SCTP stack (RFC4960) 50/9150/74
authorMarco Varlese <marco.varlese@suse.com>
Mon, 30 Oct 2017 17:17:21 +0000 (18:17 +0100)
committerFlorin Coras <florin.coras@gmail.com>
Wed, 24 Jan 2018 15:45:35 +0000 (15:45 +0000)
== CONTENT ==
* SCTP chunks definition as per RFC4960;
* Helper functions to set/get values to/from the corresponding chunks;
* Hooks to the session/application layers;
* Complete state-machine handling;
* Implementation for unexpected chunk received in a certain
state (state-machine error handling)
* Support for 1-single connection;
* Sample application to test receive/transmit data-path;
* Test to validate SCTP stack;

Change-Id: I1b55c455ab400be9513f4e094dadfc3181d2ebc9
Signed-off-by: Marco Varlese <marco.varlese@suse.com>
23 files changed:
MAINTAINERS
src/vnet.am
src/vnet/buffer.h
src/vnet/ip/format.h
src/vnet/ip/punt.c
src/vnet/ipsec/ipsec_output.c
src/vnet/sctp/builtin_client.c [new file with mode: 0644]
src/vnet/sctp/builtin_client.h [new file with mode: 0644]
src/vnet/sctp/builtin_server.c [new file with mode: 0644]
src/vnet/sctp/sctp.c [new file with mode: 0644]
src/vnet/sctp/sctp.h [new file with mode: 0644]
src/vnet/sctp/sctp_debug.h [new file with mode: 0644]
src/vnet/sctp/sctp_error.def [new file with mode: 0644]
src/vnet/sctp/sctp_format.c [new file with mode: 0644]
src/vnet/sctp/sctp_input.c [new file with mode: 0644]
src/vnet/sctp/sctp_output.c [new file with mode: 0644]
src/vnet/sctp/sctp_packet.h [new file with mode: 0644]
src/vnet/sctp/sctp_pg.c [new file with mode: 0644]
src/vnet/sctp/sctp_timer.h [new file with mode: 0644]
src/vnet/session/application_interface.c
src/vnet/session/transport.c
src/vnet/session/transport.h
test/test_sctp.py [new file with mode: 0644]

index 81b34f8..b26b044 100644 (file)
@@ -117,6 +117,10 @@ VNET TCP Stack
 M:     Florin Coras <fcoras@cisco.com>
 F:     src/vnet/tcp
 
+VNET SCTP Stack
+M:     Marco Varlese <mvarlese@suse.de>
+F:     src/vnet/sctp
+
 VNET VXLAN
 M:     John Lo <loj@cisco.com>
 F:     src/vnet/vxlan/
@@ -127,7 +131,7 @@ M:  Hongjun Ni <hongjun.ni@intel.com>
 F:     src/vnet/vxlan-gpe/
 
 VNET GENEVE
-M:     Marco Varlese <marco.varlese@suse.com>
+M:     Marco Varlese <mvarlese@suse.de>
 F:     src/vnet/geneve/
 
 Plugin - ACL
index 32d3167..52b4329 100644 (file)
@@ -542,6 +542,25 @@ nobase_include_HEADERS +=                  \
 
 API_FILES += vnet/udp/udp.api
 
+########################################
+# Layer 4 protocol: sctp
+########################################
+libvnet_la_SOURCES +=                          \
+  vnet/sctp/sctp.c                             \
+  vnet/sctp/sctp_pg.c                          \
+  vnet/sctp/sctp_input.c                       \
+  vnet/sctp/sctp_output.c                      \
+  vnet/sctp/sctp_format.c                      \
+  vnet/sctp/builtin_server.c                   \
+  vnet/sctp/builtin_client.c
+
+nobase_include_HEADERS +=                      \
+  vnet/sctp/sctp_error.def                     \
+  vnet/sctp/sctp_packet.h                      \
+  vnet/sctp/sctp_timer.h                       \
+  vnet/sctp/sctp.h                             \
+  vnet/sctp/builtin_client.h
+
 ########################################
 # Tunnel protocol: gre
 ########################################
index 317f8bb..097f68f 100644 (file)
@@ -329,6 +329,19 @@ typedef struct
       u8 flags;
     } tcp;
 
+    /* SCTP */
+    struct
+    {
+      u32 connection_index;
+      u16 sid; /**< Stream ID */
+      u16 ssn; /**< Stream Sequence Number */
+      u32 tsn; /**< Transmission Sequence Number */
+      u16 hdr_offset;          /**< offset relative to ip hdr */
+      u16 data_offset;         /**< offset relative to ip hdr */
+      u16 data_len;            /**< data len */
+      u8 flags;
+    } sctp;
+
     /* SNAT */
     struct
     {
index c35f0f4..d527e31 100644 (file)
@@ -99,9 +99,10 @@ format_function_t format_ip6_header;
 unformat_function_t unformat_pg_ip6_header;
 
 /* Format a TCP/UDP headers. */
-format_function_t format_tcp_header, format_udp_header;
+format_function_t format_tcp_header, format_udp_header, format_sctp_header;
 
-unformat_function_t unformat_pg_tcp_header, unformat_pg_udp_header;
+unformat_function_t unformat_pg_tcp_header, unformat_pg_udp_header,
+  unformat_pg_sctp_header;
 
 #endif /* included_ip_format_h */
 
index b417427..4a027bf 100644 (file)
@@ -27,6 +27,7 @@
 #include <vnet/pg/pg.h>
 #include <vnet/udp/udp.h>
 #include <vnet/tcp/tcp.h>
+#include <vnet/sctp/sctp.h>
 #include <vnet/ip/punt.h>
 #include <vppinfra/sparse_vec.h>
 #include <vlib/unix/unix.h>
@@ -689,11 +690,13 @@ vnet_punt_add_del (vlib_main_t * vm, u8 ipv, u8 protocol, u16 port,
                   bool is_add)
 {
 
-  /* For now we only support UDP punt */
-  if (protocol != IP_PROTOCOL_UDP && protocol != IP_PROTOCOL_TCP)
+  /* For now we only support TCP, UDP and SCTP punt */
+  if (protocol != IP_PROTOCOL_UDP &&
+      protocol != IP_PROTOCOL_TCP && protocol != IP_PROTOCOL_SCTP)
     return clib_error_return (0,
-                             "only UDP (%d) and TCP (%d) protocols are supported, got %d",
-                             IP_PROTOCOL_UDP, IP_PROTOCOL_TCP, protocol);
+                             "only UDP (%d), TCP (%d) and SCTP (%d) protocols are supported, got %d",
+                             IP_PROTOCOL_UDP, IP_PROTOCOL_TCP,
+                             IP_PROTOCOL_SCTP, protocol);
 
   if (ipv != (u8) ~ 0 && ipv != 4 && ipv != 6)
     return clib_error_return (0, "IP version must be 4 or 6, got %d", ipv);
@@ -706,6 +709,8 @@ vnet_punt_add_del (vlib_main_t * vm, u8 ipv, u8 protocol, u16 port,
            udp_punt_unknown (vm, 1, is_add);
          else if (protocol == IP_PROTOCOL_TCP)
            tcp_punt_unknown (vm, 1, is_add);
+         else if (protocol == IP_PROTOCOL_SCTP)
+           sctp_punt_unknown (vm, 1, is_add);
        }
 
       if ((ipv == 6) || (ipv == (u8) ~ 0))
@@ -714,6 +719,8 @@ vnet_punt_add_del (vlib_main_t * vm, u8 ipv, u8 protocol, u16 port,
            udp_punt_unknown (vm, 0, is_add);
          else if (protocol == IP_PROTOCOL_TCP)
            tcp_punt_unknown (vm, 0, is_add);
+         else if (protocol == IP_PROTOCOL_SCTP)
+           sctp_punt_unknown (vm, 0, is_add);
        }
 
       return 0;
@@ -721,8 +728,9 @@ vnet_punt_add_del (vlib_main_t * vm, u8 ipv, u8 protocol, u16 port,
 
   else if (is_add)
     {
-      if (protocol == IP_PROTOCOL_TCP)
-       return clib_error_return (0, "punt TCP ports is not supported yet");
+      if (protocol == IP_PROTOCOL_TCP || protocol == IP_PROTOCOL_SCTP)
+       return clib_error_return (0,
+                                 "punt TCP/SCTP ports is not supported yet");
 
       if (ipv == 4 || ipv == (u8) ~ 0)
        udp_register_dst_port (vm, port, udp4_punt_node.index, 1);
index e86292c..d56b665 100644 (file)
@@ -100,7 +100,9 @@ ipsec_output_policy_match (ipsec_spd_t * spd, u8 pr, u32 la, u32 ra, u16 lp,
     if (ra > clib_net_to_host_u32 (p->raddr.stop.ip4.as_u32))
       continue;
 
-    if (PREDICT_FALSE ((pr != IP_PROTOCOL_TCP) && (pr != IP_PROTOCOL_UDP)))
+    if (PREDICT_FALSE
+       ((pr != IP_PROTOCOL_TCP) && (pr != IP_PROTOCOL_UDP)
+        && (pr != IP_PROTOCOL_SCTP)))
       return p;
 
     if (lp < p->lport.start)
@@ -153,7 +155,9 @@ ipsec_output_ip6_policy_match (ipsec_spd_t * spd,
     if (!ip6_addr_match_range (la, &p->laddr.start.ip6, &p->laddr.stop.ip6))
       continue;
 
-    if (PREDICT_FALSE ((pr != IP_PROTOCOL_TCP) && (pr != IP_PROTOCOL_UDP)))
+    if (PREDICT_FALSE
+       ((pr != IP_PROTOCOL_TCP) && (pr != IP_PROTOCOL_UDP)
+        && (pr != IP_PROTOCOL_SCTP)))
       return p;
 
     if (lp < p->lport.start)
diff --git a/src/vnet/sctp/builtin_client.c b/src/vnet/sctp/builtin_client.c
new file mode 100644 (file)
index 0000000..4e50c0a
--- /dev/null
@@ -0,0 +1,834 @@
+/*
+ * Copyright (c) 2018 SUSE LLC.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include <vnet/vnet.h>
+#include <vnet/plugin/plugin.h>
+#include <vnet/sctp/builtin_client.h>
+
+#include <vlibapi/api.h>
+#include <vlibmemory/api.h>
+#include <vpp/app/version.h>
+
+tclient_main_t tclient_main;
+
+#define SCTP_BUILTIN_CLIENT_DBG (0)
+
+static void
+signal_evt_to_cli_i (int *code)
+{
+  tclient_main_t *tm = &tclient_main;
+  ASSERT (vlib_get_thread_index () == 0);
+  vlib_process_signal_event (tm->vlib_main, tm->cli_node_index, *code, 0);
+}
+
+static void
+signal_evt_to_cli (int code)
+{
+  if (vlib_get_thread_index () != 0)
+    vl_api_rpc_call_main_thread (signal_evt_to_cli_i, (u8 *) & code,
+                                sizeof (code));
+  else
+    signal_evt_to_cli_i (&code);
+}
+
+static void
+send_test_chunk (tclient_main_t * tm, session_t * s)
+{
+  u8 *test_data = tm->connect_test_data;
+  int test_buf_offset;
+  u32 bytes_this_chunk;
+  session_fifo_event_t evt;
+  svm_fifo_t *txf;
+  int rv;
+
+  ASSERT (vec_len (test_data) > 0);
+
+  test_buf_offset = s->bytes_sent % vec_len (test_data);
+  bytes_this_chunk = vec_len (test_data) - test_buf_offset;
+  bytes_this_chunk = bytes_this_chunk < s->bytes_to_send
+    ? bytes_this_chunk : s->bytes_to_send;
+
+  txf = s->server_tx_fifo;
+  rv = svm_fifo_enqueue_nowait (txf, bytes_this_chunk,
+                               test_data + test_buf_offset);
+
+  /* If we managed to enqueue data... */
+  if (rv > 0)
+    {
+      /* Account for it... */
+      s->bytes_to_send -= rv;
+      s->bytes_sent += rv;
+
+      if (SCTP_BUILTIN_CLIENT_DBG)
+       {
+          /* *INDENT-OFF* */
+          ELOG_TYPE_DECLARE (e) =
+            {
+              .format = "tx-enq: xfer %d bytes, sent %u remain %u",
+              .format_args = "i4i4i4",
+            };
+          /* *INDENT-ON* */
+         struct
+         {
+           u32 data[3];
+         } *ed;
+         ed = ELOG_DATA (&vlib_global_main.elog_main, e);
+         ed->data[0] = rv;
+         ed->data[1] = s->bytes_sent;
+         ed->data[2] = s->bytes_to_send;
+       }
+
+      /* Poke the session layer */
+      if (svm_fifo_set_event (txf))
+       {
+         /* Fabricate TX event, send to vpp */
+         evt.fifo = txf;
+         evt.event_type = FIFO_EVENT_APP_TX;
+
+         if (svm_queue_add
+             (tm->vpp_event_queue[txf->master_thread_index], (u8 *) & evt,
+              0 /* do wait for mutex */ ))
+           clib_warning ("could not enqueue event");
+       }
+    }
+}
+
+static void
+receive_test_chunk (tclient_main_t * tm, session_t * s)
+{
+  svm_fifo_t *rx_fifo = s->server_rx_fifo;
+  u32 my_thread_index = vlib_get_thread_index ();
+  int n_read, i;
+
+  /* Allow enqueuing of new event */
+  // svm_fifo_unset_event (rx_fifo);
+
+  if (tm->test_bytes)
+    {
+      n_read = svm_fifo_dequeue_nowait (rx_fifo,
+                                       vec_len (tm->rx_buf[my_thread_index]),
+                                       tm->rx_buf[my_thread_index]);
+    }
+  else
+    {
+      n_read = svm_fifo_max_dequeue (rx_fifo);
+      svm_fifo_dequeue_drop (rx_fifo, n_read);
+    }
+
+  if (SCTP_BUILTIN_CLIENT_DBG)
+    clib_warning ("Receiving test chunk; n_read = %d", n_read);
+
+  if (n_read > 0)
+    {
+      if (SCTP_BUILTIN_CLIENT_DBG)
+       {
+          /* *INDENT-OFF* */
+          ELOG_TYPE_DECLARE (e) =
+            {
+              .format = "rx-deq: %d bytes",
+              .format_args = "i4",
+            };
+          /* *INDENT-ON* */
+         struct
+         {
+           u32 data[1];
+         } *ed;
+         ed = ELOG_DATA (&vlib_global_main.elog_main, e);
+         ed->data[0] = n_read;
+       }
+
+      if (tm->test_bytes)
+       {
+         for (i = 0; i < n_read; i++)
+           {
+             if (tm->rx_buf[my_thread_index][i]
+                 != ((s->bytes_received + i) & 0xff))
+               {
+                 clib_warning ("read %d error at byte %lld, 0x%x not 0x%x",
+                               n_read, s->bytes_received + i,
+                               tm->rx_buf[my_thread_index][i],
+                               ((s->bytes_received + i) & 0xff));
+                 tm->test_failed = 1;
+               }
+           }
+       }
+
+      if (s->bytes_to_receive < n_read)
+       {
+         s->bytes_to_receive = 0;
+         s->bytes_received += s->bytes_received;
+       }
+      else
+       {
+         s->bytes_to_receive -= n_read;
+         s->bytes_received += n_read;
+       }
+    }
+}
+
+static uword
+builtin_client_node_fn (vlib_main_t * vm, vlib_node_runtime_t * node,
+                       vlib_frame_t * frame)
+{
+  tclient_main_t *tm = &tclient_main;
+  int my_thread_index = vlib_get_thread_index ();
+  session_t *sp;
+  int i;
+  int delete_session;
+  u32 *connection_indices;
+  u32 *connections_this_batch;
+  u32 nconnections_this_batch;
+
+  connection_indices = tm->connection_index_by_thread[my_thread_index];
+  connections_this_batch =
+    tm->connections_this_batch_by_thread[my_thread_index];
+
+  if ((tm->run_test == 0) ||
+      ((vec_len (connection_indices) == 0)
+       && vec_len (connections_this_batch) == 0))
+    return 0;
+
+  /* Grab another pile of connections */
+  if (PREDICT_FALSE (vec_len (connections_this_batch) == 0))
+    {
+      nconnections_this_batch =
+       clib_min (tm->connections_per_batch, vec_len (connection_indices));
+
+      ASSERT (nconnections_this_batch > 0);
+      vec_validate (connections_this_batch, nconnections_this_batch - 1);
+      clib_memcpy (connections_this_batch,
+                  connection_indices + vec_len (connection_indices)
+                  - nconnections_this_batch,
+                  nconnections_this_batch * sizeof (u32));
+      _vec_len (connection_indices) -= nconnections_this_batch;
+    }
+
+  if (PREDICT_FALSE (tm->prev_conns != tm->connections_per_batch
+                    && tm->prev_conns == vec_len (connections_this_batch)))
+    {
+      tm->repeats++;
+      tm->prev_conns = vec_len (connections_this_batch);
+      if (tm->repeats == 500000)
+       {
+         clib_warning ("stuck clients");
+       }
+    }
+  else
+    {
+      tm->prev_conns = vec_len (connections_this_batch);
+      tm->repeats = 0;
+    }
+
+  for (i = 0; i < vec_len (connections_this_batch); i++)
+    {
+      delete_session = 1;
+
+      sp = pool_elt_at_index (tm->sessions, connections_this_batch[i]);
+
+      if (sp->bytes_to_send > 0)
+       {
+         send_test_chunk (tm, sp);
+         delete_session = 0;
+       }
+
+      if (sp->bytes_to_receive > 0)
+       {
+         receive_test_chunk (tm, sp);
+         delete_session = 0;
+       }
+      if (PREDICT_FALSE (delete_session == 1))
+       {
+         u32 index, thread_index;
+         stream_session_t *s;
+
+         __sync_fetch_and_add (&tm->tx_total, sp->bytes_sent);
+         __sync_fetch_and_add (&tm->rx_total, sp->bytes_received);
+
+         session_parse_handle (sp->vpp_session_handle,
+                               &index, &thread_index);
+         s = session_get_if_valid (index, thread_index);
+
+         if (s)
+           {
+             vnet_disconnect_args_t _a, *a = &_a;
+             a->handle = session_handle (s);
+             a->app_index = tm->app_index;
+             vnet_disconnect_session (a);
+
+             vec_delete (connections_this_batch, 1, i);
+             i--;
+             __sync_fetch_and_add (&tm->ready_connections, -1);
+           }
+         else
+           clib_warning ("session AWOL?");
+
+         /* Kick the debug CLI process */
+         if (tm->ready_connections == 0)
+           {
+             signal_evt_to_cli (2);
+           }
+       }
+    }
+
+  tm->connection_index_by_thread[my_thread_index] = connection_indices;
+  tm->connections_this_batch_by_thread[my_thread_index] =
+    connections_this_batch;
+  return 0;
+}
+
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE (builtin_sctp_client_node) =
+{
+  .function = builtin_client_node_fn,
+  .name = "builtin-sctp-client",
+  .type = VLIB_NODE_TYPE_INPUT,
+  .state = VLIB_NODE_STATE_DISABLED,
+};
+/* *INDENT-ON* */
+
+static int
+create_api_loopback (tclient_main_t * tm)
+{
+  api_main_t *am = &api_main;
+  vl_shmem_hdr_t *shmem_hdr;
+
+  shmem_hdr = am->shmem_hdr;
+  tm->vl_input_queue = shmem_hdr->vl_input_queue;
+  tm->my_client_index =
+    vl_api_memclnt_create_internal ("sctp_test_client", tm->vl_input_queue);
+  return 0;
+}
+
+static int
+sctp_test_clients_init (vlib_main_t * vm)
+{
+  tclient_main_t *tm = &tclient_main;
+  vlib_thread_main_t *vtm = vlib_get_thread_main ();
+  u32 num_threads;
+  int i;
+
+  if (create_api_loopback (tm))
+    return -1;
+
+  num_threads = 1 /* main thread */  + vtm->n_threads;
+
+  /* Init test data. Big buffer */
+  vec_validate (tm->connect_test_data, 1024 * 1024 - 1);
+  for (i = 0; i < vec_len (tm->connect_test_data); i++)
+    tm->connect_test_data[i] = i & 0xff;
+
+  vec_validate (tm->rx_buf, num_threads - 1);
+  for (i = 0; i < num_threads; i++)
+    vec_validate (tm->rx_buf[i], vec_len (tm->connect_test_data) - 1);
+
+  tm->is_init = 1;
+
+  vec_validate (tm->connection_index_by_thread, vtm->n_vlib_mains);
+  vec_validate (tm->connections_this_batch_by_thread, vtm->n_vlib_mains);
+  vec_validate (tm->vpp_event_queue, vtm->n_vlib_mains);
+
+  return 0;
+}
+
+static int
+builtin_session_connected_callback (u32 app_index, u32 api_context,
+                                   stream_session_t * s, u8 is_fail)
+{
+  tclient_main_t *tm = &tclient_main;
+  session_t *session;
+  u32 session_index;
+  u8 thread_index = vlib_get_thread_index ();
+
+  if (is_fail)
+    {
+      clib_warning ("connection %d failed!", api_context);
+      signal_evt_to_cli (-1);
+      return 0;
+    }
+
+  ASSERT (s->thread_index == thread_index);
+
+  if (!tm->vpp_event_queue[thread_index])
+    tm->vpp_event_queue[thread_index] =
+      session_manager_get_vpp_event_queue (thread_index);
+
+  /*
+   * Setup session
+   */
+  clib_spinlock_lock_if_init (&tm->sessions_lock);
+  pool_get (tm->sessions, session);
+  clib_spinlock_unlock_if_init (&tm->sessions_lock);
+
+  memset (session, 0, sizeof (*session));
+  session_index = session - tm->sessions;
+  session->bytes_to_send = tm->bytes_to_send;
+  session->bytes_to_receive = tm->no_return ? 0ULL : tm->bytes_to_send;
+  session->server_rx_fifo = s->server_rx_fifo;
+  session->server_rx_fifo->client_session_index = session_index;
+  session->server_tx_fifo = s->server_tx_fifo;
+  session->server_tx_fifo->client_session_index = session_index;
+  session->vpp_session_handle = session_handle (s);
+
+  vec_add1 (tm->connection_index_by_thread[thread_index], session_index);
+  __sync_fetch_and_add (&tm->ready_connections, 1);
+  if (tm->ready_connections == tm->expected_connections)
+    {
+      tm->run_test = 1;
+      /* Signal the CLI process that the action is starting... */
+      signal_evt_to_cli (1);
+    }
+
+  return 0;
+}
+
+static void
+builtin_session_reset_callback (stream_session_t * s)
+{
+  if (s->session_state == SESSION_STATE_READY)
+    clib_warning ("Reset active connection %U", format_stream_session, s, 2);
+  stream_session_cleanup (s);
+  return;
+}
+
+static int
+builtin_session_create_callback (stream_session_t * s)
+{
+  return 0;
+}
+
+static void
+builtin_session_disconnect_callback (stream_session_t * s)
+{
+  tclient_main_t *tm = &tclient_main;
+  vnet_disconnect_args_t _a, *a = &_a;
+  a->handle = session_handle (s);
+  a->app_index = tm->app_index;
+  vnet_disconnect_session (a);
+  return;
+}
+
+static int
+builtin_server_rx_callback (stream_session_t * s)
+{
+  return 0;
+}
+
+/* *INDENT-OFF* */
+static session_cb_vft_t builtin_clients = {
+  .session_reset_callback = builtin_session_reset_callback,
+  .session_connected_callback = builtin_session_connected_callback,
+  .session_accept_callback = builtin_session_create_callback,
+  .session_disconnect_callback = builtin_session_disconnect_callback,
+  .builtin_server_rx_callback = builtin_server_rx_callback
+};
+/* *INDENT-ON* */
+
+static clib_error_t *
+attach_builtin_test_clients_app (u8 * appns_id, u64 appns_flags,
+                                u64 appns_secret)
+{
+  u32 segment_name_length, prealloc_fifos, segment_size = 2 << 20;
+  tclient_main_t *tm = &tclient_main;
+  vnet_app_attach_args_t _a, *a = &_a;
+  u8 segment_name[128];
+  u64 options[16];
+  clib_error_t *error = 0;
+
+  segment_name_length = ARRAY_LEN (segment_name);
+
+  memset (a, 0, sizeof (*a));
+  memset (options, 0, sizeof (options));
+
+  a->api_client_index = tm->my_client_index;
+  a->segment_name = segment_name;
+  a->segment_name_length = segment_name_length;
+  a->session_cb_vft = &builtin_clients;
+
+  prealloc_fifos = tm->prealloc_fifos ? tm->expected_connections : 1;
+
+  if (tm->private_segment_size)
+    segment_size = tm->private_segment_size;
+
+  options[APP_OPTIONS_ACCEPT_COOKIE] = 0x12345678;
+  options[APP_OPTIONS_SEGMENT_SIZE] = segment_size;
+  options[APP_OPTIONS_RX_FIFO_SIZE] = tm->fifo_size;
+  options[APP_OPTIONS_TX_FIFO_SIZE] = tm->fifo_size;
+  options[APP_OPTIONS_PRIVATE_SEGMENT_COUNT] = tm->private_segment_count;
+  options[APP_OPTIONS_PREALLOC_FIFO_PAIRS] = prealloc_fifos;
+
+  options[APP_OPTIONS_FLAGS] = APP_OPTIONS_FLAGS_IS_BUILTIN;
+  if (appns_id)
+    {
+      options[APP_OPTIONS_FLAGS] |= appns_flags;
+      options[APP_OPTIONS_NAMESPACE_SECRET] = appns_secret;
+    }
+  a->options = options;
+  a->namespace_id = appns_id;
+
+  if ((error = vnet_application_attach (a)))
+    return error;
+
+  tm->app_index = a->app_index;
+  return 0;
+}
+
+static void *
+tclient_thread_fn (void *arg)
+{
+  return 0;
+}
+
+/** Start a transmit thread */
+int
+start_tx_pthread_sctp (tclient_main_t * tm)
+{
+  if (tm->client_thread_handle == 0)
+    {
+      int rv = pthread_create (&tm->client_thread_handle,
+                              NULL /*attr */ ,
+                              tclient_thread_fn, 0);
+      if (rv)
+       {
+         tm->client_thread_handle = 0;
+         return -1;
+       }
+    }
+  return 0;
+}
+
+clib_error_t *
+clients_connect_sctp (vlib_main_t * vm, u8 * uri, u32 n_clients)
+{
+  tclient_main_t *tm = &tclient_main;
+  vnet_connect_args_t _a, *a = &_a;
+  clib_error_t *error = 0;
+  int i;
+  for (i = 0; i < n_clients; i++)
+    {
+      memset (a, 0, sizeof (*a));
+
+      a->uri = (char *) uri;
+      a->api_context = i;
+      a->app_index = tm->app_index;
+      a->mp = 0;
+
+      if ((error = vnet_connect_uri (a)))
+       return error;
+
+
+      /* Crude pacing for call setups  */
+      if ((i % 4) == 0)
+       vlib_process_suspend (vm, 10e-6);
+      ASSERT (i + 1 >= tm->ready_connections);
+      while (i + 1 - tm->ready_connections > 1000)
+       {
+         vlib_process_suspend (vm, 100e-6);
+       }
+    }
+  return 0;
+}
+
+#define CLI_OUTPUT(_fmt, _args...)                     \
+  if (!tm->no_output)                                          \
+    vlib_cli_output(vm, _fmt, ##_args)
+
+static clib_error_t *
+test_sctp_clients_command_fn (vlib_main_t * vm,
+                             unformat_input_t * input,
+                             vlib_cli_command_t * cmd)
+{
+  tclient_main_t *tm = &tclient_main;
+  vlib_thread_main_t *thread_main = vlib_get_thread_main ();
+  uword *event_data = 0, event_type;
+  u8 *default_connect_uri = (u8 *) "sctp://6.0.1.1/1234", *uri, *appns_id = 0;
+  u64 tmp, total_bytes, appns_flags = 0, appns_secret = 0;
+  f64 test_timeout = 20.0, syn_timeout = 20.0, delta;
+  f64 time_before_connects;
+  u32 n_clients = 1;
+  int preallocate_sessions = 0;
+  char *transfer_type;
+  clib_error_t *error = 0;
+  int i;
+
+  tm->bytes_to_send = 8192;
+  tm->no_return = 0;
+  tm->fifo_size = 64 << 10;
+  tm->connections_per_batch = 1000;
+  tm->private_segment_count = 0;
+  tm->private_segment_size = 0;
+  tm->no_output = 0;
+  tm->test_bytes = 0;
+  tm->test_failed = 0;
+  tm->vlib_main = vm;
+  if (thread_main->n_vlib_mains > 1)
+    clib_spinlock_init (&tm->sessions_lock);
+  vec_free (tm->connect_uri);
+
+  while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+    {
+      if (unformat (input, "nclients %d", &n_clients))
+       ;
+      else if (unformat (input, "mbytes %lld", &tmp))
+       tm->bytes_to_send = tmp << 20;
+      else if (unformat (input, "gbytes %lld", &tmp))
+       tm->bytes_to_send = tmp << 30;
+      else if (unformat (input, "bytes %lld", &tm->bytes_to_send))
+       ;
+      else if (unformat (input, "uri %s", &tm->connect_uri))
+       ;
+      else if (unformat (input, "test-timeout %f", &test_timeout))
+       ;
+      else if (unformat (input, "syn-timeout %f", &syn_timeout))
+       ;
+      else if (unformat (input, "no-return"))
+       tm->no_return = 1;
+      else if (unformat (input, "fifo-size %d", &tm->fifo_size))
+       tm->fifo_size <<= 10;
+      else if (unformat (input, "private-segment-count %d",
+                        &tm->private_segment_count))
+       ;
+      else if (unformat (input, "private-segment-size %U",
+                        unformat_memory_size, &tmp))
+       {
+         if (tmp >= 0x100000000ULL)
+           return clib_error_return
+             (0, "private segment size %lld (%llu) too large", tmp, tmp);
+         tm->private_segment_size = tmp;
+       }
+      else if (unformat (input, "preallocate-fifos"))
+       tm->prealloc_fifos = 1;
+      else if (unformat (input, "preallocate-sessions"))
+       preallocate_sessions = 1;
+      else
+       if (unformat (input, "client-batch %d", &tm->connections_per_batch))
+       ;
+      else if (unformat (input, "appns %_%v%_", &appns_id))
+       ;
+      else if (unformat (input, "all-scope"))
+       appns_flags |= (APP_OPTIONS_FLAGS_USE_GLOBAL_SCOPE
+                       | APP_OPTIONS_FLAGS_USE_LOCAL_SCOPE);
+      else if (unformat (input, "local-scope"))
+       appns_flags = APP_OPTIONS_FLAGS_USE_LOCAL_SCOPE;
+      else if (unformat (input, "global-scope"))
+       appns_flags = APP_OPTIONS_FLAGS_USE_GLOBAL_SCOPE;
+      else if (unformat (input, "secret %lu", &appns_secret))
+       ;
+      else if (unformat (input, "no-output"))
+       tm->no_output = 1;
+      else if (unformat (input, "test-bytes"))
+       tm->test_bytes = 1;
+      else
+       return clib_error_return (0, "unknown input `%U'",
+                                 format_unformat_error, input);
+    }
+
+  /* Store cli process node index for signalling */
+  tm->cli_node_index = vlib_get_current_process (vm)->node_runtime.node_index;
+
+  if (tm->is_init == 0)
+    {
+      if (sctp_test_clients_init (vm))
+       return clib_error_return (0, "failed init");
+    }
+
+  tm->ready_connections = 0;
+  tm->expected_connections = n_clients;
+  tm->rx_total = 0;
+  tm->tx_total = 0;
+
+  uri = default_connect_uri;
+  if (tm->connect_uri)
+    uri = tm->connect_uri;
+
+#if SCTP_BUILTIN_CLIENT_PTHREAD
+  start_tx_pthread ();
+#endif
+
+  vlib_worker_thread_barrier_sync (vm);
+  vnet_session_enable_disable (vm, 1 /* turn on SCTP, etc. */ );
+  vlib_worker_thread_barrier_release (vm);
+
+  if (tm->test_client_attached == 0)
+    {
+      if ((error = attach_builtin_test_clients_app (appns_id, appns_flags,
+                                                   appns_secret)))
+       {
+         vec_free (appns_id);
+         clib_error_report (error);
+         return error;
+       }
+      vec_free (appns_id);
+    }
+  tm->test_client_attached = 1;
+
+  /* Turn on the builtin client input nodes */
+  for (i = 0; i < thread_main->n_vlib_mains; i++)
+    vlib_node_set_state (vlib_mains[i], builtin_sctp_client_node.index,
+                        VLIB_NODE_STATE_POLLING);
+
+  if (preallocate_sessions)
+    {
+      session_t *sp __attribute__ ((unused));
+      for (i = 0; i < n_clients; i++)
+       pool_get (tm->sessions, sp);
+      for (i = 0; i < n_clients; i++)
+       pool_put_index (tm->sessions, i);
+    }
+
+  /* Fire off connect requests */
+  time_before_connects = vlib_time_now (vm);
+  if ((error = clients_connect_sctp (vm, uri, n_clients)))
+    return error;
+
+  /* Park until the sessions come up, or ten seconds elapse... */
+  vlib_process_wait_for_event_or_clock (vm, syn_timeout);
+
+  event_type = vlib_process_get_events (vm, &event_data);
+  switch (event_type)
+    {
+    case ~0:
+      CLI_OUTPUT ("Timeout with only %d sessions active...",
+                 tm->ready_connections);
+      error =
+       clib_error_return (0, "failed: syn timeout (%f) with %d sessions",
+                          syn_timeout, tm->ready_connections);
+      goto cleanup;
+
+    case 1:
+      delta = vlib_time_now (vm) - time_before_connects;
+      if (delta != 0.0)
+       CLI_OUTPUT ("%d three-way handshakes in %.2f seconds %.2f/s",
+                   n_clients, delta, ((f64) n_clients) / delta);
+
+      tm->test_start_time = vlib_time_now (tm->vlib_main);
+      CLI_OUTPUT ("Test started at %.6f", tm->test_start_time);
+      break;
+
+    default:
+      CLI_OUTPUT ("unexpected event(1): %d", event_type);
+      error = clib_error_return (0, "failed: unexpected event(1): %d",
+                                event_type);
+      goto cleanup;
+    }
+
+  /* Now wait for the sessions to finish... */
+  vlib_process_wait_for_event_or_clock (vm, test_timeout);
+  event_type = vlib_process_get_events (vm, &event_data);
+  switch (event_type)
+    {
+    case ~0:
+      CLI_OUTPUT ("Timeout with %d sessions still active...",
+                 tm->ready_connections);
+      error = clib_error_return (0, "failed: timeout with %d sessions",
+                                tm->ready_connections);
+      goto cleanup;
+
+    case 2:
+      tm->test_end_time = vlib_time_now (vm);
+      CLI_OUTPUT ("Test finished at %.6f", tm->test_end_time);
+      break;
+
+    default:
+      CLI_OUTPUT ("unexpected event(2): %d", event_type);
+      error = clib_error_return (0, "failed: unexpected event(2): %d",
+                                event_type);
+      goto cleanup;
+    }
+
+  delta = tm->test_end_time - tm->test_start_time;
+
+  if (delta != 0.0)
+    {
+      total_bytes = (tm->no_return ? tm->tx_total : tm->rx_total);
+      transfer_type = tm->no_return ? "half-duplex" : "full-duplex";
+      CLI_OUTPUT ("%lld bytes (%lld mbytes, %lld gbytes) in %.2f seconds",
+                 total_bytes, total_bytes / (1ULL << 20),
+                 total_bytes / (1ULL << 30), delta);
+      CLI_OUTPUT ("%.2f bytes/second %s", ((f64) total_bytes) / (delta),
+                 transfer_type);
+      CLI_OUTPUT ("%.4f gbit/second %s",
+                 (((f64) total_bytes * 8.0) / delta / 1e9), transfer_type);
+    }
+  else
+    {
+      CLI_OUTPUT ("zero delta-t?");
+      error = clib_error_return (0, "failed: zero delta-t");
+      goto cleanup;
+    }
+
+  if (tm->test_bytes && tm->test_failed)
+    error = clib_error_return (0, "failed: test bytes");
+
+cleanup:
+  tm->run_test = 0;
+  for (i = 0; i < vec_len (tm->connection_index_by_thread); i++)
+    {
+      vec_reset_length (tm->connection_index_by_thread[i]);
+      vec_reset_length (tm->connections_this_batch_by_thread[i]);
+    }
+
+  pool_free (tm->sessions);
+
+  /* Detach the application, so we can use different fifo sizes next time */
+  if (tm->test_client_attached)
+    {
+      vnet_app_detach_args_t _da, *da = &_da;
+      int rv;
+
+      da->app_index = tm->app_index;
+      rv = vnet_application_detach (da);
+      if (rv)
+       {
+         error = clib_error_return (0, "failed: app detach");
+         CLI_OUTPUT ("WARNING: app detach failed...");
+       }
+      tm->test_client_attached = 0;
+      tm->app_index = ~0;
+    }
+  if (error)
+    CLI_OUTPUT ("test failed");
+  return error;
+}
+
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (test_clients_command, static) =
+{
+  .path = "test sctp clients",
+  .short_help = "test sctp clients [nclients %d] [[m|g]bytes <bytes>] "
+      "[test-timeout <time>][syn-timeout <time>][no-return][fifo-size <size>]"
+      "[private-segment-count <count>][private-segment-size <bytes>[m|g]]"
+      "[preallocate-fifos][preallocate-sessions][client-batch <batch-size>]"
+      "[uri <sctp://ip/port>][test-bytes][no-output]",
+  .function = test_sctp_clients_command_fn,
+  .is_mp_safe = 1,
+};
+/* *INDENT-ON* */
+
+clib_error_t *
+sctp_test_clients_main_init (vlib_main_t * vm)
+{
+  tclient_main_t *tm = &tclient_main;
+  tm->is_init = 0;
+  return 0;
+}
+
+VLIB_INIT_FUNCTION (sctp_test_clients_main_init);
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/sctp/builtin_client.h b/src/vnet/sctp/builtin_client.h
new file mode 100644 (file)
index 0000000..ecf22d8
--- /dev/null
@@ -0,0 +1,121 @@
+/*
+ * Copyright (c) 2018 SUSE LLC.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef __included_tclient_h__
+#define __included_tclient_h__
+
+#include <vnet/vnet.h>
+#include <vnet/ip/ip.h>
+#include <vnet/ethernet/ethernet.h>
+
+#include <vppinfra/hash.h>
+#include <vppinfra/error.h>
+#include <svm/queue.h>
+#include <svm/svm_fifo_segment.h>
+#include <vnet/session/session.h>
+#include <vnet/session/application_interface.h>
+
+typedef struct
+{
+  u64 bytes_to_send;
+  u64 bytes_sent;
+  u64 bytes_to_receive;
+  u64 bytes_received;
+
+  svm_fifo_t *server_rx_fifo;
+  svm_fifo_t *server_tx_fifo;
+
+  u64 vpp_session_handle;
+} session_t;
+
+typedef struct
+{
+  /*
+   * Application setup parameters
+   */
+  svm_queue_t *vl_input_queue;         /**< vpe input queue */
+  svm_queue_t **vpp_event_queue;
+
+  u32 cli_node_index;                  /**< cli process node index */
+  u32 my_client_index;                 /**< loopback API client handle */
+  u32 app_index;                       /**< app index after attach */
+
+  /*
+   * Configuration params
+   */
+  u8 *connect_uri;                     /**< URI for slave's connect */
+  u64 bytes_to_send;                   /**< Bytes to send */
+  u32 configured_segment_size;
+  u32 fifo_size;
+  u32 expected_connections;            /**< Number of clients/connections */
+  u32 connections_per_batch;           /**< Connections to rx/tx at once */
+  u32 private_segment_count;           /**< Number of private fifo segs */
+  u32 private_segment_size;            /**< size of private fifo segs */
+
+  /*
+   * Test state variables
+   */
+  session_t *sessions;                 /**< Session pool, shared */
+  clib_spinlock_t sessions_lock;
+  u8 **rx_buf;                         /**< intermediate rx buffers */
+  u8 *connect_test_data;               /**< Pre-computed test data */
+  u32 **connection_index_by_thread;
+  u32 **connections_this_batch_by_thread; /**< active connection batch */
+  pthread_t client_thread_handle;
+
+  volatile u32 ready_connections;
+  volatile u32 finished_connections;
+  volatile u64 rx_total;
+  volatile u64 tx_total;
+  volatile int run_test;               /**< Signal start of test */
+
+  f64 test_start_time;
+  f64 test_end_time;
+  u32 prev_conns;
+  u32 repeats;
+  /*
+   * Flags
+   */
+  u8 is_init;
+  u8 test_client_attached;
+  u8 no_return;
+  u8 test_return_packets;
+  int i_am_master;
+  int drop_packets;            /**< drop all packets */
+  u8 prealloc_fifos;           /**< Request fifo preallocation */
+  u8 no_output;
+  u8 test_bytes;
+  u8 test_failed;
+
+  /*
+   * Convenience
+   */
+  vlib_main_t *vlib_main;
+  vnet_main_t *vnet_main;
+  ethernet_main_t *ethernet_main;
+} tclient_main_t;
+
+extern tclient_main_t tclient_main;
+
+vlib_node_registration_t tclient_node;
+
+#endif /* __included_tclient_h__ */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/sctp/builtin_server.c b/src/vnet/sctp/builtin_server.c
new file mode 100644 (file)
index 0000000..81267e7
--- /dev/null
@@ -0,0 +1,472 @@
+/*
+ * Copyright (c) 2018 SUSE LLC.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include <vnet/vnet.h>
+#include <vlibmemory/api.h>
+#include <vnet/session/application.h>
+#include <vnet/session/application_interface.h>
+
+typedef struct
+{
+  /*
+   * Server app parameters
+   */
+  svm_queue_t **vpp_queue;
+  svm_queue_t *vl_input_queue;         /**< Sever's event queue */
+
+  u32 app_index;               /**< Server app index */
+  u32 my_client_index;         /**< API client handle */
+  u32 node_index;              /**< process node index for evnt scheduling */
+
+  /*
+   * Config params
+   */
+  u8 no_echo;                  /**< Don't echo traffic */
+  u32 fifo_size;                       /**< Fifo size */
+  u32 rcv_buffer_size;         /**< Rcv buffer size */
+  u32 prealloc_fifos;          /**< Preallocate fifos */
+  u32 private_segment_count;   /**< Number of private segments  */
+  u32 private_segment_size;    /**< Size of private segments  */
+  char *server_uri;            /**< Server URI */
+
+  /*
+   * Test state
+   */
+  u8 **rx_buf;                 /**< Per-thread RX buffer */
+  u64 byte_index;
+  u32 **rx_retries;
+
+  vlib_main_t *vlib_main;
+} builtin_server_main_t;
+
+builtin_server_main_t builtin_server_main;
+
+int
+builtin_sctp_session_accept_callback (stream_session_t * s)
+{
+  builtin_server_main_t *bsm = &builtin_server_main;
+
+  bsm->vpp_queue[s->thread_index] =
+    session_manager_get_vpp_event_queue (s->thread_index);
+  s->session_state = SESSION_STATE_READY;
+  bsm->byte_index = 0;
+  vec_validate (bsm->rx_retries[s->thread_index], s->session_index);
+  bsm->rx_retries[s->thread_index][s->session_index] = 0;
+  return 0;
+}
+
+void
+builtin_sctp_session_disconnect_callback (stream_session_t * s)
+{
+  builtin_server_main_t *bsm = &builtin_server_main;
+  vnet_disconnect_args_t _a, *a = &_a;
+
+  a->handle = session_handle (s);
+  a->app_index = bsm->app_index;
+  vnet_disconnect_session (a);
+}
+
+void
+builtin_sctp_session_reset_callback (stream_session_t * s)
+{
+  clib_warning ("Reset session %U", format_stream_session, s, 2);
+  stream_session_cleanup (s);
+}
+
+
+int
+builtin_sctp_session_connected_callback (u32 app_index, u32 api_context,
+                                        stream_session_t * s, u8 is_fail)
+{
+  clib_warning ("called...");
+  return -1;
+}
+
+int
+builtin_sctp_add_segment_callback (u32 client_index,
+                                  const u8 * seg_name, u32 seg_size)
+{
+  clib_warning ("called...");
+  return -1;
+}
+
+int
+builtin_sctp_redirect_connect_callback (u32 client_index, void *mp)
+{
+  clib_warning ("called...");
+  return -1;
+}
+
+void
+test_bytes_sctp (builtin_server_main_t * bsm, int actual_transfer)
+{
+  int i;
+  u32 my_thread_id = vlib_get_thread_index ();
+
+  for (i = 0; i < actual_transfer; i++)
+    {
+      if (bsm->rx_buf[my_thread_id][i] != ((bsm->byte_index + i) & 0xff))
+       {
+         clib_warning ("at %lld expected %d got %d", bsm->byte_index + i,
+                       (bsm->byte_index + i) & 0xff,
+                       bsm->rx_buf[my_thread_id][i]);
+       }
+    }
+  bsm->byte_index += actual_transfer;
+}
+
+/*
+ * If no-echo, just read the data and be done with it
+ */
+int
+builtin_sctp_server_rx_callback_no_echo (stream_session_t * s)
+{
+  builtin_server_main_t *bsm = &builtin_server_main;
+  u32 my_thread_id = vlib_get_thread_index ();
+  int actual_transfer;
+  svm_fifo_t *rx_fifo;
+
+  rx_fifo = s->server_rx_fifo;
+
+  do
+    {
+      actual_transfer =
+       svm_fifo_dequeue_nowait (rx_fifo, bsm->rcv_buffer_size,
+                                bsm->rx_buf[my_thread_id]);
+    }
+  while (actual_transfer > 0);
+  return 0;
+}
+
+int
+builtin_sctp_server_rx_callback (stream_session_t * s)
+{
+  u32 n_written, max_dequeue, max_enqueue, max_transfer;
+  int actual_transfer;
+  svm_fifo_t *tx_fifo, *rx_fifo;
+  builtin_server_main_t *bsm = &builtin_server_main;
+  session_fifo_event_t evt;
+  u32 thread_index = vlib_get_thread_index ();
+
+  ASSERT (s->thread_index == thread_index);
+
+  rx_fifo = s->server_rx_fifo;
+  tx_fifo = s->server_tx_fifo;
+
+  ASSERT (rx_fifo->master_thread_index == thread_index);
+  ASSERT (tx_fifo->master_thread_index == thread_index);
+
+  max_dequeue = svm_fifo_max_dequeue (s->server_rx_fifo);
+  max_enqueue = svm_fifo_max_enqueue (s->server_tx_fifo);
+
+  if (PREDICT_FALSE (max_dequeue == 0))
+    return 0;
+
+  /* Number of bytes we're going to copy */
+  max_transfer = (max_dequeue < max_enqueue) ? max_dequeue : max_enqueue;
+
+  /* No space in tx fifo */
+  if (PREDICT_FALSE (max_transfer == 0))
+    {
+      /* XXX timeout for session that are stuck */
+
+    rx_event:
+      /* Program self-tap to retry */
+      if (svm_fifo_set_event (rx_fifo))
+       {
+         svm_queue_t *q;
+         evt.fifo = rx_fifo;
+         evt.event_type = FIFO_EVENT_BUILTIN_RX;
+
+         q = bsm->vpp_queue[thread_index];
+         if (PREDICT_FALSE (q->cursize == q->maxsize))
+           clib_warning ("out of event queue space");
+         else if (svm_queue_add (q, (u8 *) & evt, 0))
+           clib_warning ("failed to enqueue self-tap");
+
+         if (bsm->rx_retries[thread_index][s->session_index] == 500000)
+           {
+             clib_warning ("session stuck: %U", format_stream_session, s, 2);
+           }
+         if (bsm->rx_retries[thread_index][s->session_index] < 500001)
+           bsm->rx_retries[thread_index][s->session_index]++;
+       }
+
+      return 0;
+    }
+
+  _vec_len (bsm->rx_buf[thread_index]) = max_transfer;
+
+  actual_transfer = svm_fifo_dequeue_nowait (rx_fifo, max_transfer,
+                                            bsm->rx_buf[thread_index]);
+  ASSERT (actual_transfer == max_transfer);
+
+//  test_bytes (bsm, actual_transfer);
+
+  /*
+   * Echo back
+   */
+
+  n_written = svm_fifo_enqueue_nowait (tx_fifo, actual_transfer,
+                                      bsm->rx_buf[thread_index]);
+
+  if (n_written != max_transfer)
+    clib_warning ("short trout!");
+
+  if (svm_fifo_set_event (tx_fifo))
+    {
+      /* Fabricate TX event, send to vpp */
+      evt.fifo = tx_fifo;
+      evt.event_type = FIFO_EVENT_APP_TX;
+
+      if (svm_queue_add (bsm->vpp_queue[s->thread_index],
+                        (u8 *) & evt, 0 /* do wait for mutex */ ))
+       clib_warning ("failed to enqueue tx evt");
+    }
+
+  if (PREDICT_FALSE (n_written < max_dequeue))
+    goto rx_event;
+
+  return 0;
+}
+
+static session_cb_vft_t builtin_session_cb_vft = {
+  .session_accept_callback = builtin_sctp_session_accept_callback,
+  .session_disconnect_callback = builtin_sctp_session_disconnect_callback,
+  .session_connected_callback = builtin_sctp_session_connected_callback,
+  .add_segment_callback = builtin_sctp_add_segment_callback,
+  .redirect_connect_callback = builtin_sctp_redirect_connect_callback,
+  .builtin_server_rx_callback = builtin_sctp_server_rx_callback,
+  .session_reset_callback = builtin_sctp_session_reset_callback
+};
+
+/* Abuse VPP's input queue */
+static int
+create_api_loopback (vlib_main_t * vm)
+{
+  builtin_server_main_t *bsm = &builtin_server_main;
+  api_main_t *am = &api_main;
+  vl_shmem_hdr_t *shmem_hdr;
+
+  shmem_hdr = am->shmem_hdr;
+  bsm->vl_input_queue = shmem_hdr->vl_input_queue;
+  bsm->my_client_index =
+    vl_api_memclnt_create_internal ("sctp_test_server", bsm->vl_input_queue);
+  return 0;
+}
+
+static int
+server_attach (u8 * appns_id, u64 appns_flags, u64 appns_secret)
+{
+  builtin_server_main_t *bsm = &builtin_server_main;
+  u8 segment_name[128];
+  u64 options[APP_OPTIONS_N_OPTIONS];
+  vnet_app_attach_args_t _a, *a = &_a;
+  u32 segment_size = 512 << 20;
+
+  memset (a, 0, sizeof (*a));
+  memset (options, 0, sizeof (options));
+
+  if (bsm->no_echo)
+    builtin_session_cb_vft.builtin_server_rx_callback =
+      builtin_sctp_server_rx_callback_no_echo;
+  else
+    builtin_session_cb_vft.builtin_server_rx_callback =
+      builtin_sctp_server_rx_callback;
+
+  if (bsm->private_segment_size)
+    segment_size = bsm->private_segment_size;
+
+  a->api_client_index = bsm->my_client_index;
+  a->session_cb_vft = &builtin_session_cb_vft;
+  a->options = options;
+  a->options[APP_OPTIONS_SEGMENT_SIZE] = segment_size;
+  a->options[APP_OPTIONS_RX_FIFO_SIZE] = bsm->fifo_size;
+  a->options[APP_OPTIONS_TX_FIFO_SIZE] = bsm->fifo_size;
+  a->options[APP_OPTIONS_PRIVATE_SEGMENT_COUNT] = bsm->private_segment_count;
+  a->options[APP_OPTIONS_PREALLOC_FIFO_PAIRS] =
+    bsm->prealloc_fifos ? bsm->prealloc_fifos : 1;
+
+  a->options[APP_OPTIONS_FLAGS] = APP_OPTIONS_FLAGS_IS_BUILTIN;
+  if (appns_id)
+    {
+      a->namespace_id = appns_id;
+      a->options[APP_OPTIONS_FLAGS] |= appns_flags;
+      a->options[APP_OPTIONS_NAMESPACE_SECRET] = appns_secret;
+    }
+  a->segment_name = segment_name;
+  a->segment_name_length = ARRAY_LEN (segment_name);
+
+  if (vnet_application_attach (a))
+    {
+      clib_warning ("failed to attach server");
+      return -1;
+    }
+  bsm->app_index = a->app_index;
+  return 0;
+}
+
+static int
+server_listen ()
+{
+  builtin_server_main_t *bsm = &builtin_server_main;
+  vnet_bind_args_t _a, *a = &_a;
+  memset (a, 0, sizeof (*a));
+  a->app_index = bsm->app_index;
+  a->uri = bsm->server_uri;
+  return vnet_bind_uri (a);
+}
+
+static int
+server_create (vlib_main_t * vm, u8 * appns_id, u64 appns_flags,
+              u64 appns_secret)
+{
+  builtin_server_main_t *bsm = &builtin_server_main;
+  vlib_thread_main_t *vtm = vlib_get_thread_main ();
+  u32 num_threads;
+  int i;
+
+  if (bsm->my_client_index == (u32) ~ 0)
+    {
+      if (create_api_loopback (vm))
+       {
+         clib_warning ("failed to create api loopback");
+         return -1;
+       }
+    }
+
+  num_threads = 1 /* main thread */  + vtm->n_threads;
+  vec_validate (builtin_server_main.vpp_queue, num_threads - 1);
+  vec_validate (bsm->rx_buf, num_threads - 1);
+  vec_validate (bsm->rx_retries, num_threads - 1);
+
+  for (i = 0; i < num_threads; i++)
+    vec_validate (bsm->rx_buf[i], bsm->rcv_buffer_size);
+
+  if (server_attach (appns_id, appns_flags, appns_secret))
+    {
+      clib_warning ("failed to attach server");
+      return -1;
+    }
+  if (server_listen ())
+    {
+      clib_warning ("failed to start listening");
+      return -1;
+    }
+  return 0;
+}
+
+static clib_error_t *
+server_create_command_fn (vlib_main_t * vm, unformat_input_t * input,
+                         vlib_cli_command_t * cmd)
+{
+  builtin_server_main_t *bsm = &builtin_server_main;
+  u8 server_uri_set = 0, *appns_id = 0;
+  u64 tmp, appns_flags = 0, appns_secret = 0;
+  int rv;
+
+  bsm->no_echo = 0;
+  bsm->fifo_size = 64 << 10;
+  bsm->rcv_buffer_size = 128 << 10;
+  bsm->prealloc_fifos = 0;
+  bsm->private_segment_count = 0;
+  bsm->private_segment_size = 0;
+  vec_free (bsm->server_uri);
+
+  while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+    {
+      if (unformat (input, "no-echo"))
+       bsm->no_echo = 1;
+      else if (unformat (input, "fifo-size %d", &bsm->fifo_size))
+       bsm->fifo_size <<= 10;
+      else if (unformat (input, "rcv-buf-size %d", &bsm->rcv_buffer_size))
+       ;
+      else if (unformat (input, "prealloc-fifos %d", &bsm->prealloc_fifos))
+       ;
+      else if (unformat (input, "private-segment-count %d",
+                        &bsm->private_segment_count))
+       ;
+      else if (unformat (input, "private-segment-size %U",
+                        unformat_memory_size, &tmp))
+       {
+         if (tmp >= 0x100000000ULL)
+           return clib_error_return
+             (0, "private segment size %lld (%llu) too large", tmp, tmp);
+         bsm->private_segment_size = tmp;
+       }
+      else if (unformat (input, "uri %s", &bsm->server_uri))
+       server_uri_set = 1;
+      else if (unformat (input, "appns %_%v%_", &appns_id))
+       ;
+      else if (unformat (input, "all-scope"))
+       appns_flags |= (APP_OPTIONS_FLAGS_USE_GLOBAL_SCOPE
+                       | APP_OPTIONS_FLAGS_USE_LOCAL_SCOPE);
+      else if (unformat (input, "local-scope"))
+       appns_flags |= APP_OPTIONS_FLAGS_USE_LOCAL_SCOPE;
+      else if (unformat (input, "global-scope"))
+       appns_flags |= APP_OPTIONS_FLAGS_USE_GLOBAL_SCOPE;
+      else if (unformat (input, "secret %lu", &appns_secret))
+       ;
+      else
+       return clib_error_return (0, "unknown input `%U'",
+                                 format_unformat_error, input);
+    }
+
+  vnet_session_enable_disable (vm, 1 /* turn on SCTP, etc. */ );
+
+  if (!server_uri_set)
+    bsm->server_uri = (char *) format (0, "sctp://0.0.0.0/1234%c", 0);
+
+  rv = server_create (vm, appns_id, appns_flags, appns_secret);
+  vec_free (appns_id);
+  switch (rv)
+    {
+    case 0:
+      break;
+    default:
+      return clib_error_return (0, "server_create returned %d", rv);
+    }
+
+  return 0;
+}
+
+/* *INDENT-OFF* */
+VLIB_CLI_COMMAND (server_create_command, static) =
+{
+  .path = "test sctp server",
+  .short_help = "test sctp server [no echo][fifo-size <mbytes>] "
+      "[rcv-buf-size <bytes>][prealloc-fifos <count>]"
+      "[private-segment-count <count>][private-segment-size <bytes[m|g]>]"
+      "[uri <sctp://ip/port>]",
+  .function = server_create_command_fn,
+};
+/* *INDENT-ON* */
+
+clib_error_t *
+builtin_sctp_server_main_init (vlib_main_t * vm)
+{
+  builtin_server_main_t *bsm = &builtin_server_main;
+  bsm->my_client_index = ~0;
+  return 0;
+}
+
+VLIB_INIT_FUNCTION (builtin_sctp_server_main_init);
+
+/*
+* fd.io coding-style-patch-verification: ON
+*
+* Local Variables:
+* eval: (c-set-style "gnu")
+* End:
+*/
diff --git a/src/vnet/sctp/sctp.c b/src/vnet/sctp/sctp.c
new file mode 100644 (file)
index 0000000..2e37a91
--- /dev/null
@@ -0,0 +1,848 @@
+/*
+ * Copyright (c) 2017 SUSE LLC.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include <vnet/sctp/sctp.h>
+#include <vnet/sctp/sctp_debug.h>
+
+sctp_main_t sctp_main;
+
+static u32
+sctp_connection_bind (u32 session_index, transport_endpoint_t * tep)
+{
+  sctp_main_t *tm = &sctp_main;
+  sctp_connection_t *listener;
+  void *iface_ip;
+
+  pool_get (tm->listener_pool, listener);
+  memset (listener, 0, sizeof (*listener));
+
+  listener->sub_conn[MAIN_SCTP_SUB_CONN_IDX].parent = listener;
+  listener->sub_conn[MAIN_SCTP_SUB_CONN_IDX].c_c_index =
+    listener - tm->listener_pool;
+  listener->sub_conn[MAIN_SCTP_SUB_CONN_IDX].connection.lcl_port = tep->port;
+
+  /* If we are provided a sw_if_index, bind using one of its IPs */
+  if (ip_is_zero (&tep->ip, 1) && tep->sw_if_index != ENDPOINT_INVALID_INDEX)
+    {
+      if ((iface_ip = ip_interface_get_first_ip (tep->sw_if_index,
+                                                tep->is_ip4)))
+       ip_set (&tep->ip, iface_ip, tep->is_ip4);
+    }
+  ip_copy (&listener->sub_conn[MAIN_SCTP_SUB_CONN_IDX].connection.lcl_ip,
+          &tep->ip, tep->is_ip4);
+
+  listener->sub_conn[MAIN_SCTP_SUB_CONN_IDX].connection.is_ip4 = tep->is_ip4;
+  listener->sub_conn[MAIN_SCTP_SUB_CONN_IDX].connection.proto =
+    TRANSPORT_PROTO_SCTP;
+  listener->sub_conn[MAIN_SCTP_SUB_CONN_IDX].c_s_index = session_index;
+  listener->sub_conn[MAIN_SCTP_SUB_CONN_IDX].connection.fib_index =
+    tep->fib_index;
+  listener->state = SCTP_STATE_CLOSED;
+
+  sctp_connection_timers_init (listener);
+
+  return listener->sub_conn[MAIN_SCTP_SUB_CONN_IDX].c_c_index;
+}
+
+u32
+sctp_session_bind (u32 session_index, transport_endpoint_t * tep)
+{
+  return sctp_connection_bind (session_index, tep);
+}
+
+static void
+sctp_connection_unbind (u32 listener_index)
+{
+  sctp_main_t *tm = vnet_get_sctp_main ();
+  sctp_connection_t *tc;
+
+  tc = pool_elt_at_index (tm->listener_pool, listener_index);
+
+  /* Poison the entry */
+  if (CLIB_DEBUG > 0)
+    memset (tc, 0xFA, sizeof (*tc));
+
+  pool_put_index (tm->listener_pool, listener_index);
+}
+
+u32
+sctp_session_unbind (u32 listener_index)
+{
+  sctp_connection_unbind (listener_index);
+  return 0;
+}
+
+void
+sctp_punt_unknown (vlib_main_t * vm, u8 is_ip4, u8 is_add)
+{
+  sctp_main_t *tm = &sctp_main;
+  if (is_ip4)
+    tm->punt_unknown4 = is_add;
+  else
+    tm->punt_unknown6 = is_add;
+}
+
+static int
+sctp_alloc_custom_local_endpoint (sctp_main_t * tm, ip46_address_t * lcl_addr,
+                                 u16 * lcl_port, u8 is_ip4)
+{
+  int index, port;
+  if (is_ip4)
+    {
+      index = tm->last_v4_address_rotor++;
+      if (tm->last_v4_address_rotor >= vec_len (tm->ip4_src_addresses))
+       tm->last_v4_address_rotor = 0;
+      lcl_addr->ip4.as_u32 = tm->ip4_src_addresses[index].as_u32;
+    }
+  else
+    {
+      index = tm->last_v6_address_rotor++;
+      if (tm->last_v6_address_rotor >= vec_len (tm->ip6_src_addresses))
+       tm->last_v6_address_rotor = 0;
+      clib_memcpy (&lcl_addr->ip6, &tm->ip6_src_addresses[index],
+                  sizeof (ip6_address_t));
+    }
+  port = transport_alloc_local_port (TRANSPORT_PROTO_SCTP, lcl_addr);
+  if (port < 1)
+    {
+      clib_warning ("Failed to allocate src port");
+      return -1;
+    }
+  *lcl_port = port;
+  return 0;
+}
+
+/**
+ * Initialize all connection timers as invalid
+ */
+void
+sctp_connection_timers_init (sctp_connection_t * tc)
+{
+  int i, j;
+
+  /* Set all to invalid */
+  for (i = 0; i < MAX_SCTP_CONNECTIONS; i++)
+    for (j = 0; j < SCTP_N_TIMERS; j++)
+      {
+       tc->sub_conn[i].timers[j] = SCTP_TIMER_HANDLE_INVALID;
+      }
+
+  tc->rto = SCTP_RTO_INIT;
+}
+
+/**
+ * Stop all connection timers
+ */
+void
+sctp_connection_timers_reset (sctp_connection_t * tc)
+{
+  int i, j;
+  for (i = 0; i < MAX_SCTP_CONNECTIONS; i++)
+    {
+      for (j = 0; j < SCTP_N_TIMERS; j++)
+       sctp_timer_reset (tc, i, j);
+    }
+}
+
+const char *sctp_fsm_states[] = {
+#define _(sym, str) str,
+  foreach_sctp_fsm_state
+#undef _
+};
+
+u8 *
+format_sctp_state (u8 * s, va_list * args)
+{
+  u32 state = va_arg (*args, u32);
+
+  if (state < SCTP_N_STATES)
+    s = format (s, "%s", sctp_fsm_states[state]);
+  else
+    s = format (s, "UNKNOWN (%d (0x%x))", state, state);
+  return s;
+}
+
+u8 *
+format_sctp_connection_id (u8 * s, va_list * args)
+{
+  /*
+     sctp_connection_t *tc = va_arg (*args, sctp_connection_t *);
+     if (!tc)
+     return s;
+     if (tc->c_is_ip4)
+     {
+     s = format (s, "[#%d][%s] %U:%d->%U:%d", tc->c_thread_index, "T",
+     format_ip4_address, &tc->c_lcl_ip4,
+     clib_net_to_host_u16 (tc->c_lcl_port), format_ip4_address,
+     &tc->c_rmt_ip4, clib_net_to_host_u16 (tc->c_rmt_port));
+     }
+     else
+     {
+     s = format (s, "[#%d][%s] %U:%d->%U:%d", tc->c_thread_index, "T",
+     format_ip6_address, &tc->c_lcl_ip6,
+     clib_net_to_host_u16 (tc->c_lcl_port), format_ip6_address,
+     &tc->c_rmt_ip6, clib_net_to_host_u16 (tc->c_rmt_port));
+     }
+   */
+  return s;
+}
+
+u8 *
+format_sctp_connection (u8 * s, va_list * args)
+{
+  sctp_connection_t *tc = va_arg (*args, sctp_connection_t *);
+  u32 verbose = va_arg (*args, u32);
+
+  if (!tc)
+    return s;
+  s = format (s, "%-50U", format_sctp_connection_id, tc);
+  if (verbose)
+    {
+      s = format (s, "%-15U", format_sctp_state, tc->state);
+    }
+
+  return s;
+}
+
+/**
+ * Initialize connection send variables.
+ */
+void
+sctp_init_snd_vars (sctp_connection_t * tc)
+{
+  u32 time_now;
+
+  /*
+   * We use the time to randomize iss and for setting up the initial
+   * timestamp. Make sure it's updated otherwise syn and ack in the
+   * handshake may make it look as if time has flown in the opposite
+   * direction for us.
+   */
+  sctp_set_time_now (vlib_get_thread_index ());
+  time_now = sctp_time_now ();
+
+  tc->iss = random_u32 (&time_now);
+  tc->snd_una = tc->iss;
+  tc->snd_nxt = tc->iss + 1;
+  tc->snd_una_max = tc->snd_nxt;
+}
+
+/**
+ * Update max segment size we're able to process.
+ *
+ * The value is constrained by our interface's MTU and IP options. It is
+ * also what we advertise to our peer.
+ */
+void
+sctp_update_rcv_mss (sctp_connection_t * tc)
+{
+  /* TODO find our iface MTU */
+  tc->a_rwnd = DEFAULT_A_RWND - sizeof (sctp_full_hdr_t);
+  tc->rcv_opts.a_rwnd = tc->a_rwnd;
+  tc->rcv_a_rwnd = tc->a_rwnd; /* This will be updated by our congestion algos */
+}
+
+void
+sctp_init_mss (sctp_connection_t * tc)
+{
+  SCTP_DBG ("CONN_INDEX = %u",
+           tc->sub_conn[MAIN_SCTP_SUB_CONN_IDX].connection.c_index);
+
+  u16 default_a_rwnd = 536;
+  sctp_update_rcv_mss (tc);
+
+  /* TODO cache mss and consider PMTU discovery */
+  tc->snd_a_rwnd = clib_min (tc->rcv_opts.a_rwnd, tc->a_rwnd);
+
+  if (tc->snd_a_rwnd < sizeof (sctp_full_hdr_t))
+    {
+      SCTP_ADV_DBG ("tc->snd_a_rwnd < sizeof(sctp_full_hdr_t)");
+      /* Assume that at least the min default mss works */
+      tc->snd_a_rwnd = default_a_rwnd;
+      tc->rcv_opts.a_rwnd = default_a_rwnd;
+    }
+
+  ASSERT (tc->snd_a_rwnd > sizeof (sctp_full_hdr_t));
+}
+
+/** Initialize sctp connection variables
+ *
+ * Should be called after having received a msg from the peer, i.e., a SYN or
+ * a SYNACK, such that connection options have already been exchanged. */
+void
+sctp_connection_init_vars (sctp_connection_t * tc)
+{
+  sctp_init_mss (tc);
+  sctp_init_snd_vars (tc);
+}
+
+always_inline sctp_connection_t *
+sctp_sub_connection_add (u8 thread_index)
+{
+  sctp_main_t *tm = vnet_get_sctp_main ();
+  sctp_connection_t *tc = tm->connections[thread_index];
+
+  tc->sub_conn[tc->next_avail_sub_conn].connection.c_index =
+    tc->sub_conn[MAIN_SCTP_SUB_CONN_IDX].connection.c_index;
+  tc->sub_conn[tc->next_avail_sub_conn].connection.thread_index =
+    thread_index;
+  tc->sub_conn[tc->next_avail_sub_conn].parent = tc;
+
+  tc->next_avail_sub_conn += 1;
+
+  return tc;
+}
+
+void
+sctp_sub_connection_add_ip4 (u8 thread_index,
+                            sctp_ipv4_addr_param_t * ipv4_addr)
+{
+  sctp_connection_t *tc = sctp_sub_connection_add (thread_index);
+
+  clib_memcpy (&tc->sub_conn[tc->next_avail_sub_conn].connection.lcl_ip.ip4,
+              &ipv4_addr->address, sizeof (ipv4_addr->address));
+}
+
+void
+sctp_sub_connection_add_ip6 (u8 thread_index,
+                            sctp_ipv6_addr_param_t * ipv6_addr)
+{
+  sctp_connection_t *tc = sctp_sub_connection_add (thread_index);
+
+  clib_memcpy (&tc->sub_conn[tc->next_avail_sub_conn].connection.lcl_ip.ip6,
+              &ipv6_addr->address, sizeof (ipv6_addr->address));
+}
+
+sctp_connection_t *
+sctp_connection_new (u8 thread_index)
+{
+  sctp_main_t *tm = vnet_get_sctp_main ();
+  sctp_connection_t *tc;
+
+  pool_get (tm->connections[thread_index], tc);
+  memset (tc, 0, sizeof (*tc));
+  tc->sub_conn[MAIN_SCTP_SUB_CONN_IDX].parent = tc;
+  tc->sub_conn[MAIN_SCTP_SUB_CONN_IDX].c_c_index =
+    tc - tm->connections[thread_index];
+  tc->sub_conn[MAIN_SCTP_SUB_CONN_IDX].c_thread_index = thread_index;
+  tc->local_tag = 0;
+  tc->next_avail_sub_conn = 1;
+
+  return tc;
+}
+
+sctp_connection_t *
+sctp_half_open_connection_new (u8 thread_index)
+{
+  sctp_main_t *tm = vnet_get_sctp_main ();
+  sctp_connection_t *tc = 0;
+  ASSERT (vlib_get_thread_index () == 0);
+  pool_get (tm->half_open_connections, tc);
+  memset (tc, 0, sizeof (*tc));
+  tc->sub_conn[MAIN_SCTP_SUB_CONN_IDX].c_c_index =
+    tc - tm->half_open_connections;
+  tc->sub_conn[MAIN_SCTP_SUB_CONN_IDX].parent = tc;
+  return tc;
+}
+
+static inline int
+sctp_connection_open (transport_endpoint_t * rmt)
+{
+  sctp_main_t *tm = vnet_get_sctp_main ();
+  sctp_connection_t *tc;
+  ip46_address_t lcl_addr;
+  u16 lcl_port;
+  uword thread_id;
+  int rv;
+
+  u8 idx = sctp_pick_conn_idx_on_state (SCTP_STATE_CLOSED);
+
+  /*
+   * Allocate local endpoint
+   */
+  if ((rmt->is_ip4 && vec_len (tm->ip4_src_addresses))
+      || (!rmt->is_ip4 && vec_len (tm->ip6_src_addresses)))
+    rv = sctp_alloc_custom_local_endpoint (tm, &lcl_addr, &lcl_port,
+                                          rmt->is_ip4);
+  else
+    rv = transport_alloc_local_endpoint (TRANSPORT_PROTO_SCTP,
+                                        rmt, &lcl_addr, &lcl_port);
+
+  if (rv)
+    return -1;
+
+  /*
+   * Create connection and send INIT CHUNK
+   */
+  thread_id = vlib_get_thread_index ();
+  ASSERT (thread_id == 0);
+
+  clib_spinlock_lock_if_init (&tm->half_open_lock);
+  tc = sctp_half_open_connection_new (thread_id);
+
+  transport_connection_t *t_conn = &tc->sub_conn[idx].connection;
+  ip_copy (&t_conn->rmt_ip, &rmt->ip, rmt->is_ip4);
+  ip_copy (&t_conn->lcl_ip, &lcl_addr, rmt->is_ip4);
+  tc->sub_conn[idx].parent = tc;
+  t_conn->rmt_port = rmt->port;
+  t_conn->lcl_port = clib_host_to_net_u16 (lcl_port);
+  t_conn->is_ip4 = rmt->is_ip4;
+  t_conn->proto = TRANSPORT_PROTO_SCTP;
+  t_conn->fib_index = rmt->fib_index;
+
+  sctp_connection_timers_init (tc);
+  /* The other connection vars will be initialized after INIT_ACK chunk received */
+  sctp_init_snd_vars (tc);
+
+  sctp_send_init (tc);
+
+  clib_spinlock_unlock_if_init (&tm->half_open_lock);
+
+  return tc->sub_conn[idx].connection.c_index;
+}
+
+/**
+ * Cleans up connection state.
+ *
+ * No notifications.
+ */
+void
+sctp_connection_cleanup (sctp_connection_t * tc)
+{
+  sctp_main_t *tm = &sctp_main;
+  u8 i;
+
+  /* Cleanup local endpoint if this was an active connect */
+  for (i = 0; i < MAX_SCTP_CONNECTIONS; i++)
+    transport_endpoint_cleanup (TRANSPORT_PROTO_SCTP,
+                               &tc->sub_conn[i].connection.lcl_ip,
+                               tc->sub_conn[i].connection.lcl_port);
+
+  /* Check if connection is not yet fully established */
+  if (tc->state == SCTP_STATE_COOKIE_WAIT)
+    {
+
+    }
+  else
+    {
+      int thread_index =
+       tc->sub_conn[MAIN_SCTP_SUB_CONN_IDX].connection.thread_index;
+
+      /* Make sure all timers are cleared */
+      sctp_connection_timers_reset (tc);
+
+      /* Poison the entry */
+      if (CLIB_DEBUG > 0)
+       memset (tc, 0xFA, sizeof (*tc));
+      pool_put (tm->connections[thread_index], tc);
+    }
+}
+
+int
+sctp_session_open (transport_endpoint_t * tep)
+{
+  return sctp_connection_open (tep);
+}
+
+u16
+sctp_check_outstanding_data_chunks (sctp_connection_t * tc)
+{
+  return 0;                    /* Indicates no more data to be read/sent */
+}
+
+void
+sctp_connection_close (sctp_connection_t * tc)
+{
+  SCTP_DBG ("Closing connection %u...",
+           tc->sub_conn[MAIN_SCTP_SUB_CONN_IDX].connection.c_index);
+
+  tc->state = SCTP_STATE_SHUTDOWN_PENDING;
+
+  sctp_send_shutdown (tc);
+}
+
+void
+sctp_session_close (u32 conn_index, u32 thread_index)
+{
+  ASSERT (thread_index == 0);
+
+  sctp_connection_t *tc;
+  tc = sctp_connection_get (conn_index, thread_index);
+  sctp_connection_close (tc);
+}
+
+void
+sctp_session_cleanup (u32 conn_index, u32 thread_index)
+{
+  sctp_connection_t *tc;
+  tc = sctp_connection_get (conn_index, thread_index);
+  sctp_connection_timers_reset (tc);
+
+  /* Wait for the session tx events to clear */
+  tc->state = SCTP_STATE_CLOSED;
+}
+
+/**
+ * Update snd_mss to reflect the effective segment size that we can send
+ */
+void
+sctp_update_snd_mss (sctp_connection_t * tc)
+{
+  /* The overhead for the sctp_header_t and sctp_chunks_common_hdr_t
+   * (the sum equals to sctp_full_hdr_t) is already taken into account
+   * for the tc->a_rwnd computation.
+   * So let's not account it again here.
+   */
+  tc->snd_hdr_length =
+    sizeof (sctp_payload_data_chunk_t) - sizeof (sctp_full_hdr_t);
+  tc->snd_a_rwnd =
+    clib_min (tc->a_rwnd, tc->rcv_opts.a_rwnd) - tc->snd_hdr_length;
+
+  SCTP_DBG ("tc->snd_a_rwnd = %u, tc->snd_hdr_length = %u ",
+           tc->snd_a_rwnd, tc->snd_hdr_length);
+
+  ASSERT (tc->snd_a_rwnd > 0);
+}
+
+u16
+sctp_session_send_mss (transport_connection_t * trans_conn)
+{
+  SCTP_DBG ("CONN_INDEX: %u", trans_conn->c_index);
+
+  sctp_connection_t *tc = sctp_get_connection_from_transport (trans_conn);
+
+  if (trans_conn == NULL)
+    {
+      SCTP_DBG ("trans_conn == NULL");
+      return 0;
+    }
+
+  if (tc == NULL)
+    {
+      SCTP_DBG ("tc == NULL");
+      return 0;
+    }
+  /* Ensure snd_mss does accurately reflect the amount of data we can push
+   * in a segment. This also makes sure that options are updated according to
+   * the current state of the connection. */
+  sctp_update_snd_mss (tc);
+
+  return tc->snd_a_rwnd;
+}
+
+u16
+sctp_snd_space (sctp_connection_t * sctp_conn)
+{
+  /* TODO: This requires a real implementation */
+  if (sctp_conn == NULL)
+    {
+      SCTP_DBG ("sctp_conn == NULL");
+      return 0;
+    }
+
+  if (sctp_conn->state != SCTP_STATE_ESTABLISHED)
+    {
+      SCTP_DBG_STATE_MACHINE
+       ("Trying to send DATA while not in SCTP_STATE_ESTABLISHED");
+      return 0;
+    }
+
+  return sctp_conn->snd_a_rwnd;
+}
+
+u32
+sctp_session_send_space (transport_connection_t * trans_conn)
+{
+  SCTP_DBG ("CONN_INDEX: %u", trans_conn->c_index);
+
+  sctp_connection_t *tc = sctp_get_connection_from_transport (trans_conn);
+
+  return sctp_snd_space (tc);
+}
+
+u32
+sctp_session_tx_fifo_offset (transport_connection_t * trans_conn)
+{
+  SCTP_DBG ("CONN_INDEX: %u", trans_conn->c_index);
+
+  sctp_connection_t *tc = sctp_get_connection_from_transport (trans_conn);
+
+  if (tc == NULL)
+    {
+      SCTP_DBG ("tc == NULL");
+      return 0;
+    }
+
+  /* This still works if fast retransmit is on */
+  return (tc->snd_nxt - tc->snd_una);
+}
+
+transport_connection_t *
+sctp_session_get_transport (u32 conn_index, u32 thread_index)
+{
+  sctp_connection_t *tc = sctp_connection_get (conn_index, thread_index);
+  return &tc->sub_conn[MAIN_SCTP_SUB_CONN_IDX].connection;
+}
+
+transport_connection_t *
+sctp_session_get_listener (u32 listener_index)
+{
+  sctp_main_t *tm = vnet_get_sctp_main ();
+  sctp_connection_t *tc;
+  tc = pool_elt_at_index (tm->listener_pool, listener_index);
+  return &tc->sub_conn[MAIN_SCTP_SUB_CONN_IDX].connection;
+}
+
+u8 *
+format_sctp_session (u8 * s, va_list * args)
+{
+  return NULL;
+}
+
+u8 *
+format_sctp_listener_session (u8 * s, va_list * args)
+{
+  return NULL;
+}
+
+void
+sctp_timer_init_handler (u32 conn_index)
+{
+  sctp_connection_t *tc;
+
+  tc = sctp_connection_get (conn_index, vlib_get_thread_index ());
+  /* note: the connection may have already disappeared */
+  if (PREDICT_FALSE (tc == 0))
+    return;
+  ASSERT (tc->state == SCTP_STATE_COOKIE_ECHOED);
+  /* Start cleanup. App wasn't notified yet so use delete notify as
+   * opposed to delete to cleanup session layer state. */
+  stream_session_delete_notify (&tc->
+                               sub_conn[MAIN_SCTP_SUB_CONN_IDX].connection);
+  tc->sub_conn[MAIN_SCTP_SUB_CONN_IDX].timers[SCTP_TIMER_T1_INIT] =
+    SCTP_TIMER_HANDLE_INVALID;
+
+  sctp_connection_cleanup (tc);
+}
+
+/* *INDENT OFF* */
+static timer_expiration_handler *sctp_timer_expiration_handlers[SCTP_N_TIMERS]
+  = {
+  sctp_timer_init_handler
+};
+
+/* *INDENT ON* */
+
+static void
+sctp_expired_timers_dispatch (u32 * expired_timers)
+{
+  int i;
+  u32 connection_index, timer_id;
+
+  for (i = 0; i < vec_len (expired_timers); i++)
+    {
+      /* Get session index and timer id */
+      connection_index = expired_timers[i] & 0x0FFFFFFF;
+      timer_id = expired_timers[i] >> 28;
+
+      /* Handle expiration */
+      (*sctp_timer_expiration_handlers[timer_id]) (connection_index);
+    }
+}
+
+void
+sctp_initialize_timer_wheels (sctp_main_t * tm)
+{
+  tw_timer_wheel_16t_2w_512sl_t *tw;
+  /* *INDENT-OFF* */
+  foreach_vlib_main (({
+    tw = &tm->timer_wheels[ii];
+    tw_timer_wheel_init_16t_2w_512sl (tw, sctp_expired_timers_dispatch,
+                                     100e-3 /* timer period 100ms */ , ~0);
+    tw->last_run_time = vlib_time_now (this_vlib_main);
+  }));
+  /* *INDENT-ON* */
+}
+
+clib_error_t *
+sctp_main_enable (vlib_main_t * vm)
+{
+  sctp_main_t *tm = vnet_get_sctp_main ();
+  vlib_thread_main_t *vtm = vlib_get_thread_main ();
+  clib_error_t *error = 0;
+  u32 num_threads;
+  int thread;
+  sctp_connection_t *tc __attribute__ ((unused));
+  u32 preallocated_connections_per_thread;
+
+  if ((error = vlib_call_init_function (vm, ip_main_init)))
+    return error;
+  if ((error = vlib_call_init_function (vm, ip4_lookup_init)))
+    return error;
+  if ((error = vlib_call_init_function (vm, ip6_lookup_init)))
+    return error;
+
+  /*
+   * Registrations
+   */
+
+  ip4_register_protocol (IP_PROTOCOL_SCTP, sctp4_input_node.index);
+  ip6_register_protocol (IP_PROTOCOL_SCTP, sctp6_input_node.index);
+
+  /*
+   * Initialize data structures
+   */
+
+  num_threads = 1 /* main thread */  + vtm->n_threads;
+  vec_validate (tm->connections, num_threads - 1);
+
+  /*
+   * Preallocate connections. Assume that thread 0 won't
+   * use preallocated threads when running multi-core
+   */
+  if (num_threads == 1)
+    {
+      thread = 0;
+      preallocated_connections_per_thread = tm->preallocated_connections;
+    }
+  else
+    {
+      thread = 1;
+      preallocated_connections_per_thread =
+       tm->preallocated_connections / (num_threads - 1);
+    }
+  for (; thread < num_threads; thread++)
+    {
+      if (preallocated_connections_per_thread)
+       pool_init_fixed (tm->connections[thread],
+                        preallocated_connections_per_thread);
+    }
+
+  /* Initialize per worker thread tx buffers (used for control messages) */
+  vec_validate (tm->tx_buffers, num_threads - 1);
+
+  /* Initialize timer wheels */
+  vec_validate (tm->timer_wheels, num_threads - 1);
+  sctp_initialize_timer_wheels (tm);
+
+  /* Initialize clocks per tick for SCTP timestamp. Used to compute
+   * monotonically increasing timestamps. */
+  tm->tstamp_ticks_per_clock = vm->clib_time.seconds_per_clock
+    / SCTP_TSTAMP_RESOLUTION;
+
+  if (num_threads > 1)
+    {
+    }
+
+  vec_validate (tm->tx_frames[0], num_threads - 1);
+  vec_validate (tm->tx_frames[1], num_threads - 1);
+  vec_validate (tm->ip_lookup_tx_frames[0], num_threads - 1);
+  vec_validate (tm->ip_lookup_tx_frames[1], num_threads - 1);
+
+  tm->bytes_per_buffer = vlib_buffer_free_list_buffer_size
+    (vm, VLIB_BUFFER_DEFAULT_FREE_LIST_INDEX);
+
+  vec_validate (tm->time_now, num_threads - 1);
+  return error;
+}
+
+clib_error_t *
+sctp_enable_disable (vlib_main_t * vm, u8 is_en)
+{
+  if (is_en)
+    {
+      if (sctp_main.is_enabled)
+       return 0;
+
+      return sctp_main_enable (vm);
+    }
+  else
+    {
+      sctp_main.is_enabled = 0;
+    }
+
+  return 0;
+}
+
+transport_connection_t *
+sctp_half_open_session_get_transport (u32 conn_index)
+{
+  sctp_connection_t *tc = sctp_half_open_connection_get (conn_index);
+  return &tc->sub_conn[MAIN_SCTP_SUB_CONN_IDX].connection;
+}
+
+u8 *
+format_sctp_half_open (u8 * s, va_list * args)
+{
+  u32 tci = va_arg (*args, u32);
+  sctp_connection_t *tc = sctp_half_open_connection_get (tci);
+  return format (s, "%U", format_sctp_connection_id, tc);
+}
+
+/* *INDENT OFF* */
+const static transport_proto_vft_t sctp_proto = {
+  .enable = sctp_enable_disable,
+  .bind = sctp_session_bind,
+  .unbind = sctp_session_unbind,
+  .open = sctp_session_open,
+  .close = sctp_session_close,
+  .cleanup = sctp_session_cleanup,
+  .push_header = sctp_push_header,
+  .send_mss = sctp_session_send_mss,
+  .send_space = sctp_session_send_space,
+  .tx_fifo_offset = NULL,      //sctp_session_tx_fifo_offset,
+  .get_connection = sctp_session_get_transport,
+  .get_listener = sctp_session_get_listener,
+  .get_half_open = sctp_half_open_session_get_transport,
+  .format_connection = format_sctp_session,
+  .format_listener = format_sctp_listener_session,
+  .format_half_open = format_sctp_half_open,
+};
+
+/* *INDENT ON* */
+
+clib_error_t *
+sctp_init (vlib_main_t * vm)
+{
+  sctp_main_t *tm = vnet_get_sctp_main ();
+  ip_main_t *im = &ip_main;
+  ip_protocol_info_t *pi;
+  /* Session layer, and by implication SCTP, are disabled by default */
+  tm->is_enabled = 0;
+
+  /* Register with IP for header parsing */
+  pi = ip_get_protocol_info (im, IP_PROTOCOL_SCTP);
+  if (pi == 0)
+    return clib_error_return (0, "SCTP protocol info AWOL");
+  pi->format_header = format_sctp_header;
+  pi->unformat_pg_edit = unformat_pg_sctp_header;
+
+  /* Register as transport with session layer */
+  transport_register_protocol (TRANSPORT_PROTO_SCTP, &sctp_proto,
+                              FIB_PROTOCOL_IP4, sctp4_output_node.index);
+  transport_register_protocol (TRANSPORT_PROTO_SCTP, &sctp_proto,
+                              FIB_PROTOCOL_IP6, sctp6_output_node.index);
+
+  return 0;
+}
+
+VLIB_INIT_FUNCTION (sctp_init);
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/sctp/sctp.h b/src/vnet/sctp/sctp.h
new file mode 100644 (file)
index 0000000..7c4df30
--- /dev/null
@@ -0,0 +1,645 @@
+/*
+ * Copyright (c) 2017 SUSE LLC.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef included_vnet_sctp_h
+#define included_vnet_sctp_h
+
+#include <vnet/vnet.h>
+#include <vnet/ip/ip.h>
+#include <vnet/sctp/sctp_timer.h>
+#include <vnet/sctp/sctp_packet.h>
+#include <vnet/session/transport.h>
+#include <vnet/session/session.h>
+
+/* SCTP timers */
+#define foreach_sctp_timer                     \
+  _(T1_INIT, "T1_INIT")                                \
+  _(T1_COOKIE, "T1_COOKIE")                            \
+  _(T2_SHUTDOWN, "T2_SHUTDOWN")                \
+  _(T3_RXTX, "T3_RXTX")                                        \
+  _(T5_SHUTDOWN_GUARD, "T5_SHUTDOWN_GUARD")
+
+typedef enum _sctp_timers
+{
+#define _(sym, str) SCTP_TIMER_##sym,
+  foreach_sctp_timer
+#undef _
+  SCTP_N_TIMERS
+} sctp_timers_e;
+
+#define SCTP_TIMER_HANDLE_INVALID ((u32) ~0)
+
+typedef enum _sctp_error
+{
+#define sctp_error(n,s) SCTP_ERROR_##n,
+#include <vnet/sctp/sctp_error.def>
+#undef sctp_error
+  SCTP_N_ERROR,
+} sctp_error_t;
+
+#define NO_FLAG 0
+
+#define IS_T_BIT_SET(var) ((var) & (1))
+#define IS_E_BIT_SET(var) ((var) & (1))
+#define IS_B_BIT_SET(var) ((var) & (1<<1))
+#define IS_U_BIT_SET(var) ((var) & (1<<2))
+
+#define MAX_SCTP_CONNECTIONS 32
+#define MAIN_SCTP_SUB_CONN_IDX 0
+
+#if (VLIB_BUFFER_TRACE_TRAJECTORY)
+#define sctp_trajectory_add_start(b, start)                    \
+{                                                              \
+    (*vlib_buffer_trace_trajectory_cb) (b, start);             \
+}
+#else
+#define sctp_trajectory_add_start(b, start)
+#endif
+
+typedef struct _sctp_sub_connection
+{
+  transport_connection_t connection;         /**< Common transport data. First! */
+  void *parent;                                                                /**< Link to the parent-super connection */
+  u32 timers[SCTP_N_TIMERS];           /**< Timer handles into timer wheel */
+
+} sctp_sub_connection_t;
+
+typedef struct
+{
+  u32 a_rwnd;          /**< Maximum segment size advertised */
+
+} sctp_options_t;
+
+typedef struct _sctp_connection
+{
+  sctp_sub_connection_t sub_conn[MAX_SCTP_CONNECTIONS];               /**< Common transport data. First! */
+
+  u8 state;                    /**< SCTP state as per sctp_state_t */
+  u16 flags;           /**< Chunk flag (see sctp_chunks_common_hdr_t) */
+  u32 local_tag;       /**< INIT_TAG generated locally */
+  u32 remote_tag;      /**< INIT_TAG generated by the remote peer */
+  u16 life_span_inc;
+
+  /** Send sequence variables RFC4960 */
+  u32 snd_una;         /**< oldest unacknowledged sequence number */
+  u32 snd_una_max;     /**< newest unacknowledged sequence number + 1*/
+  u32 snd_wl1;         /**< seq number used for last snd.wnd update */
+  u32 snd_wl2;         /**< ack number used for last snd.wnd update */
+  u32 snd_nxt;         /**< next seq number to be sent */
+
+  /** Receive sequence variables RFC4960 */
+  u32 rcv_nxt;         /**< next sequence number expected */
+  u32 rcv_las;         /**< rcv_nxt at last ack sent/rcv_wnd update */
+  u32 iss;             /**< initial sent sequence */
+  u32 irs;             /**< initial remote sequence */
+
+  /* RTT and RTO */
+  u32 rto;             /**< Retransmission timeout */
+  u32 rto_boff;                /**< Index for RTO backoff */
+  u32 srtt;            /**< Smoothed RTT */
+  u32 rttvar;          /**< Smoothed mean RTT difference. Approximates variance */
+  u32 rtt_ts;          /**< Timestamp for tracked ACK */
+  u32 rtt_seq;         /**< Sequence number for tracked ACK */
+
+  u32 a_rwnd;                  /** Constrained by medium / IP / etc. */
+  u32 rcv_a_rwnd;              /**< LOCAL max seg size that includes options. To be updated by congestion algos, etc. */
+  u32 snd_a_rwnd;              /**< REMOTE max seg size that includes options. To be updated if peer pushes back on window, etc.*/
+  sctp_options_t rcv_opts;
+  sctp_options_t snd_opts;
+  u32 snd_hdr_length;  /**< BASE HEADER LENGTH for the DATA chunk when sending */
+
+  u8 next_avail_sub_conn; /**< Represent the index of the next free slot in sub_conn */
+} sctp_connection_t;
+
+typedef void (timer_expiration_handler) (u32 index);
+
+sctp_connection_t *sctp_connection_new (u8 thread_index);
+void sctp_sub_connection_add_ip4 (u8 thread_index,
+                                 sctp_ipv4_addr_param_t * ipv4_addr);
+void sctp_sub_connection_add_ip6 (u8 thread_index,
+                                 sctp_ipv6_addr_param_t * ipv6_addr);
+void sctp_connection_close (sctp_connection_t * tc);
+void sctp_connection_cleanup (sctp_connection_t * tc);
+void sctp_connection_del (sctp_connection_t * tc);
+
+u32 sctp_push_header (transport_connection_t * tconn, vlib_buffer_t * b);
+void sctp_send_init (sctp_connection_t * tc);
+void sctp_send_shutdown (sctp_connection_t * tc);
+void sctp_send_shutdown_ack (sctp_connection_t * tc);
+void sctp_send_shutdown_complete (sctp_connection_t * tc);
+void sctp_flush_frame_to_output (vlib_main_t * vm, u8 thread_index,
+                                u8 is_ip4);
+void sctp_flush_frames_to_output (u8 thread_index);
+void sctp_punt_unknown (vlib_main_t * vm, u8 is_ip4, u8 is_add);
+
+format_function_t format_sctp_state;
+
+u8 *format_sctp_connection_id (u8 * s, va_list * args);
+u8 *format_sctp_connection (u8 * s, va_list * args);
+u8 *format_sctp_scoreboard (u8 * s, va_list * args);
+u8 *format_sctp_header (u8 * s, va_list * args);
+u8 *format_sctp_tx_trace (u8 * s, va_list * args);
+
+clib_error_t *sctp_init (vlib_main_t * vm);
+void sctp_connection_timers_init (sctp_connection_t * tc);
+void sctp_connection_timers_reset (sctp_connection_t * tc);
+void sctp_init_snd_vars (sctp_connection_t * tc);
+void sctp_connection_init_vars (sctp_connection_t * tc);
+
+void sctp_prepare_initack_chunk (sctp_connection_t * ts, vlib_buffer_t * b,
+                                ip4_address_t * ip4_addr,
+                                ip6_address_t * ip6_addr);
+void sctp_prepare_cookie_echo_chunk (sctp_connection_t * tc,
+                                    vlib_buffer_t * b,
+                                    sctp_state_cookie_param_t * sc);
+void sctp_prepare_cookie_ack_chunk (sctp_connection_t * tc,
+                                   vlib_buffer_t * b);
+void sctp_prepare_sack_chunk (sctp_connection_t * tc, vlib_buffer_t * b);
+
+u16 sctp_check_outstanding_data_chunks (sctp_connection_t * tc);
+
+#define SCTP_TICK 0.001                        /**< SCTP tick period (s) */
+#define STHZ (u32) (1/SCTP_TICK)               /**< SCTP tick frequency */
+#define SCTP_TSTAMP_RESOLUTION SCTP_TICK       /**< Time stamp resolution */
+#define SCTP_PAWS_IDLE 24 * 24 * 60 * 60 * THZ /**< 24 days */
+#define SCTP_FIB_RECHECK_PERIOD        1 * THZ /**< Recheck every 1s */
+#define SCTP_MAX_OPTION_SPACE 40
+
+#define SCTP_DUPACK_THRESHOLD  3
+#define SCTP_MAX_RX_FIFO_SIZE  4 << 20
+#define SCTP_MIN_RX_FIFO_SIZE  4 << 10
+#define SCTP_IW_N_SEGMENTS     10
+#define SCTP_ALWAYS_ACK                1       /**< On/off delayed acks */
+#define SCTP_USE_SACKS         1       /**< Disable only for testing */
+
+#define IP_PROTOCOL_SCTP       132
+
+/** SSCTP FSM state definitions as per RFC4960. */
+#define foreach_sctp_fsm_state                \
+  _(CLOSED, "CLOSED")                         \
+  _(COOKIE_WAIT, "COOKIE_WAIT")               \
+  _(COOKIE_ECHOED, "COOKIE_ECHOED")           \
+  _(ESTABLISHED, "ESTABLISHED")               \
+  _(SHUTDOWN_PENDING, "SHUTDOWN_PENDING")     \
+  _(SHUTDOWN_SENT, "SHUTDOWN_SENT")           \
+  _(SHUTDOWN_RECEIVED, "SHUTDOWN_RECEIVED")   \
+  _(SHUTDOWN_ACK_SENT, "SHUTDOWN_ACK_SENT")
+
+typedef enum _sctp_state
+{
+#define _(sym, str) SCTP_STATE_##sym,
+  foreach_sctp_fsm_state
+#undef _
+  SCTP_N_STATES
+} sctp_state_t;
+
+always_inline char *
+sctp_state_to_string (u8 state)
+{
+  switch (state)
+    {
+    case SCTP_STATE_CLOSED:
+      return "SCTP_STATE_CLOSED";
+    case SCTP_STATE_COOKIE_WAIT:
+      return "SCTP_STATE_COOKIE_WAIT";
+    case SCTP_STATE_COOKIE_ECHOED:
+      return "SCTP_STATE_COOKIE_ECHOED";
+    case SCTP_STATE_ESTABLISHED:
+      return "SCTP_STATE_ESTABLISHED";
+    case SCTP_STATE_SHUTDOWN_PENDING:
+      return "SCTP_STATE_SHUTDOWN_PENDING";
+    case SCTP_STATE_SHUTDOWN_SENT:
+      return "SCTP_STATE_SHUTDOWN_SENT";
+    case SCTP_STATE_SHUTDOWN_RECEIVED:
+      return "SCTP_STATE_SHUTDOWN_RECEIVED";
+    case SCTP_STATE_SHUTDOWN_ACK_SENT:
+      return "SCTP_STATE_SHUTDOWN_ACK_SENT";
+    }
+  return NULL;
+}
+
+always_inline char *
+sctp_chunk_to_string (u8 type)
+{
+  switch (type)
+    {
+    case DATA:
+      return "DATA";
+    case INIT:
+      return "INIT";
+    case INIT_ACK:
+      return "INIT_ACK";
+    case SACK:
+      return "SACK";
+    case HEARTBEAT:
+      return "HEARTBEAT";
+    case HEARTBEAT_ACK:
+      return "HEARTBEAT_ACK";
+    case ABORT:
+      return "ABORT";
+    case SHUTDOWN:
+      return "SHUTDOWN";
+    case SHUTDOWN_ACK:
+      return "SHUTDOWN_ACK";
+    case OPERATION_ERROR:
+      return "OPERATION_ERROR";
+    case COOKIE_ECHO:
+      return "COOKIE_ECHO";
+    case COOKIE_ACK:
+      return "COOKIE_ACK";
+    case ECNE:
+      return "ECNE";
+    case CWR:
+      return "CWR";
+    case SHUTDOWN_COMPLETE:
+      return "SHUTDOWN_COMPLETE";
+    }
+  return NULL;
+}
+
+always_inline char *
+sctp_optparam_type_to_string (u8 type)
+{
+  switch (type)
+    {
+    case SCTP_IPV4_ADDRESS_TYPE:
+      return "SCTP_IPV4_ADDRESS_TYPE";
+    case SCTP_IPV6_ADDRESS_TYPE:
+      return "SCTP_IPV6_ADDRESS_TYPE";
+    case SCTP_STATE_COOKIE_TYPE:
+      return "SCTP_STATE_COOKIE_TYPE";
+    case SCTP_UNRECOGNIZED_TYPE:
+      return "SCTP_UNRECOGNIZED_TYPE";
+    case SCTP_COOKIE_PRESERVATIVE_TYPE:
+      return "SCTP_COOKIE_PRESERVATIVE_TYPE";
+    case SCTP_HOSTNAME_ADDRESS_TYPE:
+      return "SCTP_HOSTNAME_ADDRESS_TYPE";
+    case SCTP_SUPPORTED_ADDRESS_TYPES:
+      return "SCTP_SUPPORTED_ADDRESS_TYPES";
+    }
+  return NULL;
+}
+
+#define SCTP_TICK 0.001                        /**< SCTP tick period (s) */
+#define SHZ (u32) (1/SCTP_TICK)                /**< SCTP tick frequency */
+
+/* As per RFC4960, page 83 */
+#define SCTP_RTO_INIT 3 * SHZ  /* 3 seconds */
+#define SCTP_RTO_MIN 1 * SHZ   /* 1 second */
+#define SCTP_RTO_MAX 60 * SHZ  /* 60 seconds */
+#define SCTP_RTO_BURST 4
+#define SCTP_RTO_ALPHA 1/8
+#define SCTP_RTO_BETA 1/4
+#define SCTP_VALID_COOKIE_LIFE 60 * SHZ        /* 60 seconds */
+#define SCTP_ASSOCIATION_MAX_RETRANS 10
+
+#define SCTP_TO_TIMER_TICK       SCTP_TICK*10  /* Period for converting from SCTP_TICK */
+
+typedef struct _sctp_lookup_dispatch
+{
+  u8 next, error;
+} sctp_lookup_dispatch_t;
+
+typedef struct _sctp_main
+{
+  /* Per-worker thread SCTP connection pools */
+  sctp_connection_t **connections;
+
+  /* Pool of listeners. */
+  sctp_connection_t *listener_pool;
+
+         /** Dispatch table by state and flags */
+  sctp_lookup_dispatch_t dispatch_table[SCTP_N_STATES][64];
+
+  u8 log2_tstamp_clocks_per_tick;
+  f64 tstamp_ticks_per_clock;
+  u32 *time_now;
+
+         /** per-worker tx buffer free lists */
+  u32 **tx_buffers;
+         /** per-worker tx frames to SCTP 4/6 output nodes */
+  vlib_frame_t **tx_frames[2];
+         /** per-worker tx frames to ip 4/6 lookup nodes */
+  vlib_frame_t **ip_lookup_tx_frames[2];
+
+  /* Per worker-thread timer wheel for connections timers */
+  tw_timer_wheel_16t_2w_512sl_t *timer_wheels;
+
+  /* Pool of half-open connections on which we've sent a SYN */
+  sctp_connection_t *half_open_connections;
+  clib_spinlock_t half_open_lock;
+
+  /* TODO: Congestion control algorithms registered */
+  /* sctp_cc_algorithm_t *cc_algos; */
+
+  /* Flag that indicates if stack is on or off */
+  u8 is_enabled;
+
+         /** Number of preallocated connections */
+  u32 preallocated_connections;
+
+         /** Transport table (preallocation) size parameters */
+  u32 local_endpoints_table_memory;
+  u32 local_endpoints_table_buckets;
+
+         /** Vectors of src addresses. Optional unless one needs > 63K active-opens */
+  ip4_address_t *ip4_src_addresses;
+  u32 last_v4_address_rotor;
+  u32 last_v6_address_rotor;
+  ip6_address_t *ip6_src_addresses;
+
+         /** vlib buffer size */
+  u32 bytes_per_buffer;
+
+  u8 punt_unknown4;
+  u8 punt_unknown6;
+
+} sctp_main_t;
+
+extern sctp_main_t sctp_main;
+extern vlib_node_registration_t sctp4_input_node;
+extern vlib_node_registration_t sctp6_input_node;
+extern vlib_node_registration_t sctp4_output_node;
+extern vlib_node_registration_t sctp6_output_node;
+
+always_inline sctp_main_t *
+vnet_get_sctp_main ()
+{
+  return &sctp_main;
+}
+
+always_inline sctp_header_t *
+sctp_buffer_hdr (vlib_buffer_t * b)
+{
+  ASSERT ((signed) b->current_data >= (signed) -VLIB_BUFFER_PRE_DATA_SIZE);
+  return (sctp_header_t *) (b->data + b->current_data
+                           + vnet_buffer (b)->sctp.hdr_offset);
+}
+
+clib_error_t *vnet_sctp_enable_disable (vlib_main_t * vm, u8 is_en);
+
+always_inline sctp_connection_t *
+sctp_half_open_connection_get (u32 conn_index)
+{
+  sctp_connection_t *tc = 0;
+  clib_spinlock_lock_if_init (&sctp_main.half_open_lock);
+  if (!pool_is_free_index (sctp_main.half_open_connections, conn_index))
+    tc = pool_elt_at_index (sctp_main.half_open_connections, conn_index);
+  tc->sub_conn[MAIN_SCTP_SUB_CONN_IDX].parent = tc;
+  clib_spinlock_unlock_if_init (&sctp_main.half_open_lock);
+  return tc;
+}
+
+/**
+ * Cleanup half-open connection
+ *
+ */
+always_inline void
+sctp_half_open_connection_del (sctp_connection_t * tc)
+{
+  sctp_main_t *tm = vnet_get_sctp_main ();
+  clib_spinlock_lock_if_init (&tm->half_open_lock);
+  pool_put_index (tm->half_open_connections,
+                 tc->sub_conn[MAIN_SCTP_SUB_CONN_IDX].c_c_index);
+  if (CLIB_DEBUG)
+    memset (tc, 0xFA, sizeof (*tc));
+  clib_spinlock_unlock_if_init (&tm->half_open_lock);
+}
+
+always_inline u32
+sctp_set_time_now (u32 thread_index)
+{
+  sctp_main.time_now[thread_index] = clib_cpu_time_now ()
+    * sctp_main.tstamp_ticks_per_clock;
+  return sctp_main.time_now[thread_index];
+}
+
+always_inline void
+sctp_timer_set (sctp_connection_t * tc, u8 conn_idx, u8 timer_id,
+               u32 interval)
+{
+  ASSERT (tc->sub_conn[conn_idx].connection.thread_index ==
+         vlib_get_thread_index ());
+  ASSERT (tc->sub_conn[conn_idx].timers[timer_id] ==
+         SCTP_TIMER_HANDLE_INVALID);
+
+  sctp_sub_connection_t *sub = &tc->sub_conn[conn_idx];
+  tc->sub_conn[conn_idx].timers[timer_id] =
+    tw_timer_start_16t_2w_512sl (&sctp_main.timer_wheels[sub->c_thread_index],
+                                sub->c_c_index, timer_id, interval);
+}
+
+always_inline void
+sctp_timer_reset (sctp_connection_t * tc, u8 conn_idx, u8 timer_id)
+{
+  ASSERT (tc->sub_conn[conn_idx].c_thread_index == vlib_get_thread_index ());
+  if (tc->sub_conn[conn_idx].timers[timer_id] == SCTP_TIMER_HANDLE_INVALID)
+    return;
+
+  sctp_sub_connection_t *sub = &tc->sub_conn[conn_idx];
+
+  tw_timer_stop_16t_2w_512sl (&sctp_main.timer_wheels[sub->c_thread_index],
+                             sub->timers[timer_id]);
+  sub->timers[timer_id] = SCTP_TIMER_HANDLE_INVALID;
+}
+
+always_inline void
+sctp_update_time (f64 now, u32 thread_index)
+{
+  sctp_set_time_now (thread_index);
+  tw_timer_expire_timers_16t_2w_512sl (&sctp_main.timer_wheels[thread_index],
+                                      now);
+  sctp_flush_frames_to_output (thread_index);
+}
+
+/**
+ * Try to cleanup half-open connection
+ *
+ * If called from a thread that doesn't own tc, the call won't have any
+ * effect.
+ *
+ * @param tc - connection to be cleaned up
+ * @return non-zero if cleanup failed.
+ */
+always_inline int
+sctp_half_open_connection_cleanup (sctp_connection_t * tc)
+{
+  /* Make sure this is the owning thread */
+  if (tc->sub_conn[MAIN_SCTP_SUB_CONN_IDX].c_thread_index !=
+      vlib_get_thread_index ())
+    return 1;
+  sctp_timer_reset (tc, MAIN_SCTP_SUB_CONN_IDX, SCTP_TIMER_T1_INIT);
+  sctp_half_open_connection_del (tc);
+  return 0;
+}
+
+always_inline u32
+sctp_header_bytes ()
+{
+  return sizeof (sctp_header_t);
+}
+
+always_inline sctp_connection_t *
+sctp_get_connection_from_transport (transport_connection_t * tconn)
+{
+  ASSERT (tconn != NULL);
+
+  sctp_sub_connection_t *sub = (sctp_sub_connection_t *) tconn;
+#if SCTP_ADV_DEBUG
+  if (sub == NULL)
+    SCTP_ADV_DBG ("sub == NULL");
+  if (sub->parent == NULL)
+    SCTP_ADV_DBG ("sub->parent == NULL");
+#endif
+  return (sctp_connection_t *) sub->parent;
+}
+
+always_inline u32
+sctp_time_now (void)
+{
+  return sctp_main.time_now[vlib_get_thread_index ()];
+}
+
+always_inline void
+sctp_timer_update (sctp_connection_t * tc, u8 conn_idx, u8 timer_id,
+                  u32 interval)
+{
+  ASSERT (tc->sub_conn[conn_idx].connection.thread_index ==
+         vlib_get_thread_index ());
+  sctp_sub_connection_t *sub = &tc->sub_conn[conn_idx];
+
+  if (tc->sub_conn[conn_idx].timers[timer_id] != SCTP_TIMER_HANDLE_INVALID)
+    tw_timer_stop_16t_2w_512sl (&sctp_main.timer_wheels[sub->c_thread_index],
+                               sub->timers[timer_id]);
+  tc->sub_conn[conn_idx].timers[timer_id] =
+    tw_timer_start_16t_2w_512sl (&sctp_main.timer_wheels[sub->c_thread_index],
+                                sub->c_c_index, timer_id, interval);
+}
+
+always_inline sctp_connection_t *
+sctp_listener_get (u32 tli)
+{
+  return pool_elt_at_index (sctp_main.listener_pool, tli);
+}
+
+#endif
+
+always_inline sctp_connection_t *
+sctp_connection_get (u32 conn_index, u32 thread_index)
+{
+  if (PREDICT_FALSE
+      (pool_is_free_index (sctp_main.connections[thread_index], conn_index)))
+    return 0;
+  return pool_elt_at_index (sctp_main.connections[thread_index], conn_index);
+}
+
+always_inline u8
+sctp_pick_conn_idx_on_chunk (sctp_chunk_type chunk_type)
+{
+  u8 idx = MAIN_SCTP_SUB_CONN_IDX;
+
+  switch (chunk_type)
+    {
+    case DATA:
+    case INIT:
+    case INIT_ACK:
+    case SACK:
+    case HEARTBEAT:
+    case HEARTBEAT_ACK:
+    case ABORT:
+    case SHUTDOWN:
+    case SHUTDOWN_ACK:
+    case OPERATION_ERROR:
+    case COOKIE_ECHO:
+    case COOKIE_ACK:
+    case ECNE:
+    case CWR:
+    case SHUTDOWN_COMPLETE:
+      idx = MAIN_SCTP_SUB_CONN_IDX;
+    }
+  return idx;
+}
+
+always_inline u8
+sctp_pick_conn_idx_on_state (sctp_state_t state)
+{
+  u8 idx = MAIN_SCTP_SUB_CONN_IDX;
+
+  switch (state)
+    {
+    case SCTP_STATE_CLOSED:
+    case SCTP_STATE_COOKIE_WAIT:
+    case SCTP_STATE_COOKIE_ECHOED:
+    case SCTP_STATE_ESTABLISHED:
+    case SCTP_STATE_SHUTDOWN_PENDING:
+    case SCTP_STATE_SHUTDOWN_SENT:
+    case SCTP_STATE_SHUTDOWN_RECEIVED:
+    case SCTP_STATE_SHUTDOWN_ACK_SENT:
+      idx = MAIN_SCTP_SUB_CONN_IDX;
+    default:
+      idx = MAIN_SCTP_SUB_CONN_IDX;
+    }
+  return idx;
+}
+
+/**
+ * Push SCTP header to buffer
+ *
+ * @param vm - vlib_main
+ * @param b - buffer to write the header to
+ * @param sp_net - source port net order
+ * @param dp_net - destination port net order
+ * @param sctp_hdr_opts_len - header and options length in bytes
+ *
+ * @return - pointer to start of SCTP header
+ */
+always_inline void *
+vlib_buffer_push_sctp_net_order (vlib_buffer_t * b, u16 sp, u16 dp,
+                                u8 sctp_hdr_opts_len)
+{
+  sctp_full_hdr_t *full_hdr;
+
+  full_hdr = vlib_buffer_push_uninit (b, sctp_hdr_opts_len);
+
+  full_hdr->hdr.src_port = sp;
+  full_hdr->hdr.dst_port = dp;
+  full_hdr->hdr.checksum = 0;
+  return full_hdr;
+}
+
+/**
+ * Push SCTP header to buffer
+ *
+ * @param b - buffer to write the header to
+ * @param sp_net - source port net order
+ * @param dp_net - destination port net order
+ * @param sctp_hdr_opts_len - header and options length in bytes
+ *
+ * @return - pointer to start of SCTP header
+ */
+always_inline void *
+vlib_buffer_push_sctp (vlib_buffer_t * b, u16 sp_net, u16 dp_net,
+                      u8 sctp_hdr_opts_len)
+{
+  return vlib_buffer_push_sctp_net_order (b, sp_net, dp_net,
+                                         sctp_hdr_opts_len);
+}
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/sctp/sctp_debug.h b/src/vnet/sctp/sctp_debug.h
new file mode 100644 (file)
index 0000000..b422d19
--- /dev/null
@@ -0,0 +1,62 @@
+/*
+ * Copyright (c) 2017 SUSE LLC.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef included_sctp_debug_h__
+#define included_sctp_debug_h__
+
+#include <vlib/vlib.h>
+
+typedef enum _sctp_dbg
+{
+#define _(sym, str) SCTP_DBG_##sym,
+  foreach_sctp_dbg_evt
+#undef _
+} sctp_dbg_e;
+
+#define SCTP_DEBUG_STATE_MACHINE (0)
+#if SCTP_DEBUG_STATE_MACHINE
+#define SCTP_DBG_STATE_MACHINE(_fmt, _args...) clib_warning (_fmt, ##_args)
+#else
+#define SCTP_DBG_STATE_MACHINE(_fmt, _args...)
+#endif
+
+#define SCTP_DEBUG (0)
+#if SCTP_DEBUG
+#define SCTP_DBG(_fmt, _args...) clib_warning (_fmt, ##_args)
+#else
+#define SCTP_DBG(_fmt, _args...)
+#endif
+
+#define SCTP_ADV_DEBUG (0)
+#if SCTP_ADV_DEBUG
+#define SCTP_ADV_DBG(_fmt, _args...) clib_warning (_fmt, ##_args)
+#else
+#define SCTP_ADV_DBG(_fmt, _args...)
+#endif
+
+#define SCTP_DEBUG_OUTPUT (0)
+#if SCTP_DEBUG_OUTPUT
+#define SCTP_DBG_OUTPUT(_fmt, _args...) clib_warning (_fmt, ##_args)
+#else
+#define SCTP_DBG_OUTPUT(_fmt, _args...)
+#endif
+
+#define SCTP_ADV_DEBUG_OUTPUT (0)
+#if SCTP_ADV_DEBUG_OUTPUT
+#define SCTP_ADV_DBG_OUTPUT(_fmt, _args...) clib_warning (_fmt, ##_args)
+#else
+#define SCTP_ADV_DBG_OUTPUT(_fmt, _args...)
+#endif
+
+#endif /* included_sctp_debug_h__ */
diff --git a/src/vnet/sctp/sctp_error.def b/src/vnet/sctp/sctp_error.def
new file mode 100644 (file)
index 0000000..a244fac
--- /dev/null
@@ -0,0 +1,50 @@
+/*
+ * Copyright (c) 2017 SUSE LLC.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+sctp_error (NONE, "no error")
+sctp_error (PKTS_SENT, "Packets sent")
+sctp_error (INVALID_CONNECTION, "Invalid connection")
+sctp_error (INVALID_TAG, "Invalid verification tag")
+sctp_error (INVALID_TAG_FOR_INIT, "Invalid verification tag for INIT chunk")
+sctp_error (CONNECTION_CLOSED, "Connection closed")
+sctp_error (ENQUEUED, "Packets pushed into rx fifo")
+sctp_error (CREATE_EXISTS, "Connection already exists")
+sctp_error (INITS_RCVD, "INITs received")
+sctp_error (CREATE_SESSION_FAIL, "Sessions couldn't be allocated")
+sctp_error (NO_LISTENER, "no listener for dst port")
+sctp_error (LENGTH, "inconsistent ip/tcp lengths")
+sctp_error (DISPATCH, "Dispatch error")
+sctp_error (ACK_DUP, "Duplicate ACK")
+sctp_error (DATA_CHUNK_VIOLATION, "DATA chunk received in invalid state")
+sctp_error (INIT_CHUNK_VIOLATION, "INIT chunk received in the wrong state")
+sctp_error (INIT_ACK_CHUNK_VIOLATION, "INIT_ACK chunk received in the wrong state")
+sctp_error (SACK_CHUNK_VIOLATION, "SACK chunk received in invalid state")
+sctp_error (HEARTBEAT_CHUNK_VIOLATION, "HEARTBEAT chunk received in invalid state")
+sctp_error (HEARTBEAT_ACK_CHUNK_VIOLATION, "HEARTBEAT_ACK chunk received in invalid state")
+sctp_error (ABORT_CHUNK_VIOLATION, "ABORT_CHUNK chunk received in invalid state")
+sctp_error (SHUTDOWN_CHUNK_VIOLATION, "SHUTDOWN chunk received in invalid state")
+sctp_error (SHUTDOWN_ACK_CHUNK_VIOLATION, "SHUTDOWN_ACK chunk received in invalid state")
+sctp_error (OPERATION_ERROR_VIOLATION, "OPERATION_ERROR chunk received in invalid state")
+sctp_error (COOKIE_ECHO_VIOLATION, "COOKIE_ECHO chunk received in invalid state")
+sctp_error (COOKIE_ACK_VIOLATION, "COOKIE_ACK chunk received in invalid state")
+sctp_error (ECNE_VIOLATION, "ECNE chunk received in invalid state")
+sctp_error (CWR_VIOLATION, "CWR chunk received in invalid state")
+sctp_error (SHUTDOWN_COMPLETE_VIOLATION, "SHUTDOWN_COMPLETE chunk received in invalid state")
+sctp_error (FIFO_FULL, "Packets dropped for lack of rx fifo space") 
+sctp_error (PARTIALLY_ENQUEUED, "Packets partially pushed into rx fifo") 
+sctp_error (EVENT_FIFO_FULL, "Events not sent for lack of event fifo space") 
+sctp_error (UNKOWN_CHUNK, "Unrecognized / unknown chunk or chunk-state mismatch")
+sctp_error (BUNDLING_VIOLATION, "Bundling not allowed")
+sctp_error (PUNT, "Packets punted")
+sctp_error (FILTERED, "Packets filtered")
\ No newline at end of file
diff --git a/src/vnet/sctp/sctp_format.c b/src/vnet/sctp/sctp_format.c
new file mode 100644 (file)
index 0000000..49ee04d
--- /dev/null
@@ -0,0 +1,40 @@
+/*
+ * Copyright (c) 2017 SUSE LLC.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vnet/sctp/sctp.h>
+
+/* Format SCTP header. */
+u8 *
+format_sctp_header (u8 * s, va_list * args)
+{
+  return NULL;
+}
+
+u8 *
+format_sctp_tx_trace (u8 * s, va_list * args)
+{
+  CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
+  CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
+
+  return NULL;
+}
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/sctp/sctp_input.c b/src/vnet/sctp/sctp_input.c
new file mode 100644 (file)
index 0000000..4e5ea91
--- /dev/null
@@ -0,0 +1,2202 @@
+/*
+ * Copyright (c) 2017 SUSE LLC.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include <vppinfra/sparse_vec.h>
+#include <vnet/sctp/sctp.h>
+#include <vnet/sctp/sctp_packet.h>
+#include <vnet/sctp/sctp_debug.h>
+#include <vnet/session/session.h>
+#include <math.h>
+
+static char *sctp_error_strings[] = {
+#define sctp_error(n,s) s,
+#include <vnet/sctp/sctp_error.def>
+#undef sctp_error
+};
+
+/* All SCTP nodes have the same outgoing arcs */
+#define foreach_sctp_state_next                  \
+  _ (DROP, "error-drop")                        \
+  _ (SCTP4_OUTPUT, "sctp4-output")                \
+  _ (SCTP6_OUTPUT, "sctp6-output")
+
+typedef enum _sctp_established_phase_next
+{
+#define _(s,n) SCTP_ESTABLISHED_PHASE_NEXT_##s,
+  foreach_sctp_state_next
+#undef _
+    SCTP_ESTABLISHED_PHASE_N_NEXT,
+} sctp_established_phase_next_t;
+
+typedef enum _sctp_rcv_phase_next
+{
+#define _(s,n) SCTP_RCV_PHASE_NEXT_##s,
+  foreach_sctp_state_next
+#undef _
+    SCTP_RCV_PHASE_N_NEXT,
+} sctp_rcv_phase_next_t;
+
+typedef enum _sctp_listen_phase_next
+{
+#define _(s,n) SCTP_LISTEN_PHASE_NEXT_##s,
+  foreach_sctp_state_next
+#undef _
+    SCTP_LISTEN_PHASE_N_NEXT,
+} sctp_listen_phase_next_t;
+
+typedef enum _sctp_shutdown_phase_next
+{
+#define _(s,n) SCTP_SHUTDOWN_PHASE_NEXT_##s,
+  foreach_sctp_state_next
+#undef _
+    SCTP_SHUTDOWN_PHASE_N_NEXT,
+} sctp_shutdown_phase_next_t;
+
+/* Generic, state independent indices */
+typedef enum _sctp_state_next
+{
+#define _(s,n) SCTP_NEXT_##s,
+  foreach_sctp_state_next
+#undef _
+    SCTP_STATE_N_NEXT,
+} sctp_state_next_t;
+
+typedef enum _sctp_input_next
+{
+  SCTP_INPUT_NEXT_DROP,
+  SCTP_INPUT_NEXT_LISTEN_PHASE,
+  SCTP_INPUT_NEXT_RCV_PHASE,
+  SCTP_INPUT_NEXT_ESTABLISHED_PHASE,
+  SCTP_INPUT_NEXT_SHUTDOWN_PHASE,
+  SCTP_INPUT_NEXT_PUNT_PHASE,
+  SCTP_INPUT_N_NEXT
+} sctp_input_next_t;
+
+char *
+phase_to_string (u8 phase)
+{
+  switch (phase)
+    {
+    case SCTP_INPUT_NEXT_DROP:
+      return "SCTP_INPUT_NEXT_DROP";
+    case SCTP_INPUT_NEXT_LISTEN_PHASE:
+      return "SCTP_INPUT_NEXT_LISTEN_PHASE";
+    case SCTP_INPUT_NEXT_RCV_PHASE:
+      return "SCTP_INPUT_NEXT_RCV_PHASE";
+    case SCTP_INPUT_NEXT_ESTABLISHED_PHASE:
+      return "SCTP_INPUT_NEXT_ESTABLISHED_PHASE";
+    case SCTP_INPUT_NEXT_SHUTDOWN_PHASE:
+      return "SCTP_INPUT_NEXT_SHUTDOWN_PHASE";
+    case SCTP_INPUT_NEXT_PUNT_PHASE:
+      return "SCTP_INPUT_NEXT_PUNT_PHASE";
+    }
+  return NULL;
+}
+
+#define foreach_sctp4_input_next                 \
+  _ (DROP, "error-drop")                         \
+  _ (RCV_PHASE, "sctp4-rcv")                    \
+  _ (LISTEN_PHASE, "sctp4-listen")              \
+  _ (ESTABLISHED_PHASE, "sctp4-established")            \
+  _ (SHUTDOWN_PHASE, "sctp4-shutdown") \
+  _ (PUNT_PHASE, "ip4-punt")
+
+
+#define foreach_sctp6_input_next                 \
+  _ (DROP, "error-drop")                         \
+  _ (RCV_PHASE, "sctp6-rcv")                    \
+  _ (LISTEN_PHASE, "sctp6-listen")              \
+  _ (ESTABLISHED_PHASE, "sctp6-established")            \
+  _ (SHUTDOWN_PHASE, "sctp6-shutdown")         \
+  _ (PUNT_PHASE, "ip6-punt")
+
+static u8
+sctp_lookup_is_valid (transport_connection_t * t_conn,
+                     sctp_header_t * sctp_hdr)
+{
+  sctp_connection_t *sctp_conn = sctp_get_connection_from_transport (t_conn);
+
+  if (!sctp_conn)
+    return 1;
+
+  u8 is_valid = (t_conn->lcl_port == sctp_hdr->dst_port
+                && (sctp_conn->state == SCTP_STATE_CLOSED
+                    || t_conn->rmt_port == sctp_hdr->src_port));
+
+  return is_valid;
+}
+
+/**
+ * Lookup transport connection
+ */
+static sctp_connection_t *
+sctp_lookup_connection (u32 fib_index, vlib_buffer_t * b, u8 thread_index,
+                       u8 is_ip4)
+{
+  sctp_main_t *tm = vnet_get_sctp_main ();
+  sctp_header_t *sctp_hdr;
+  transport_connection_t *tconn;
+  sctp_connection_t *sctp_conn;
+  u8 is_filtered, i;
+  if (is_ip4)
+    {
+      ip4_header_t *ip4_hdr;
+      ip4_hdr = vlib_buffer_get_current (b);
+      sctp_hdr = ip4_next_header (ip4_hdr);
+      tconn = session_lookup_connection_wt4 (fib_index,
+                                            &ip4_hdr->dst_address,
+                                            &ip4_hdr->src_address,
+                                            sctp_hdr->dst_port,
+                                            sctp_hdr->src_port,
+                                            TRANSPORT_PROTO_SCTP,
+                                            thread_index, &is_filtered);
+      if (tconn == 0)          /* Not primary connection */
+       {
+         for (i = 0; i < MAX_SCTP_CONNECTIONS; i++)
+           {
+             if ((tm->connections[thread_index]->sub_conn[i].
+                  connection.lcl_ip.ip4.as_u32 ==
+                  ip4_hdr->dst_address.as_u32)
+                 && (tm->connections[thread_index]->sub_conn[i].
+                     connection.rmt_ip.ip4.as_u32 ==
+                     ip4_hdr->src_address.as_u32))
+               {
+                 tconn =
+                   &tm->connections[thread_index]->sub_conn[i].connection;
+                 break;
+               }
+           }
+       }
+      ASSERT (tconn != 0);
+      ASSERT (sctp_lookup_is_valid (tconn, sctp_hdr));
+    }
+  else
+    {
+      ip6_header_t *ip6_hdr;
+      ip6_hdr = vlib_buffer_get_current (b);
+      sctp_hdr = ip6_next_header (ip6_hdr);
+      tconn = session_lookup_connection_wt6 (fib_index,
+                                            &ip6_hdr->dst_address,
+                                            &ip6_hdr->src_address,
+                                            sctp_hdr->dst_port,
+                                            sctp_hdr->src_port,
+                                            TRANSPORT_PROTO_SCTP,
+                                            thread_index, &is_filtered);
+      if (tconn == 0)          /* Not primary connection */
+       {
+         for (i = 0; i < MAX_SCTP_CONNECTIONS; i++)
+           {
+             if ((tm->connections[thread_index]->sub_conn[i].
+                  connection.lcl_ip.ip6.as_u64[0] ==
+                  ip6_hdr->dst_address.as_u64[0]
+                  && tm->connections[thread_index]->sub_conn[i].
+                  connection.lcl_ip.ip6.as_u64[1] ==
+                  ip6_hdr->dst_address.as_u64[1])
+                 && (tm->connections[thread_index]->sub_conn[i].
+                     connection.rmt_ip.ip6.as_u64[0] ==
+                     ip6_hdr->src_address.as_u64[0]
+                     && tm->connections[thread_index]->
+                     sub_conn[i].connection.rmt_ip.ip6.as_u64[1] ==
+                     ip6_hdr->src_address.as_u64[1]))
+               {
+                 tconn =
+                   &tm->connections[thread_index]->sub_conn[i].connection;
+                 break;
+               }
+           }
+       }
+      ASSERT (tconn != 0);
+      ASSERT (sctp_lookup_is_valid (tconn, sctp_hdr));
+    }
+  sctp_conn = sctp_get_connection_from_transport (tconn);
+  return sctp_conn;
+}
+
+typedef struct
+{
+  sctp_header_t sctp_header;
+  sctp_connection_t sctp_connection;
+} sctp_rx_trace_t;
+
+#define sctp_next_output(is_ip4) (is_ip4 ? SCTP_NEXT_SCTP4_OUTPUT          \
+                                        : SCTP_NEXT_SCTP6_OUTPUT)
+
+
+void
+sctp_set_rx_trace_data (sctp_rx_trace_t * rx_trace,
+                       sctp_connection_t * sctp_conn,
+                       sctp_header_t * sctp_hdr, vlib_buffer_t * b0,
+                       u8 is_ip4)
+{
+  if (sctp_conn)
+    {
+      clib_memcpy (&rx_trace->sctp_connection, sctp_conn,
+                  sizeof (rx_trace->sctp_connection));
+    }
+  else
+    {
+      sctp_hdr = sctp_buffer_hdr (b0);
+    }
+  clib_memcpy (&rx_trace->sctp_header, sctp_hdr,
+              sizeof (rx_trace->sctp_header));
+}
+
+always_inline u16
+sctp_calculate_implied_length (ip4_header_t * ip4_hdr, ip6_header_t * ip6_hdr,
+                              int is_ip4)
+{
+  u16 sctp_implied_packet_length = 0;
+
+  if (is_ip4)
+    sctp_implied_packet_length =
+      clib_net_to_host_u16 (ip4_hdr->length) - ip4_header_bytes (ip4_hdr);
+  else
+    sctp_implied_packet_length =
+      clib_net_to_host_u16 (ip6_hdr->payload_length) - sizeof (ip6_hdr);
+
+  return sctp_implied_packet_length;
+}
+
+always_inline u8
+sctp_is_bundling (u16 sctp_implied_length,
+                 sctp_chunks_common_hdr_t * sctp_common_hdr)
+{
+  if (sctp_implied_length !=
+      sizeof (sctp_header_t) + vnet_sctp_get_chunk_length (sctp_common_hdr))
+    return 1;
+  return 0;
+}
+
+always_inline u16
+sctp_handle_init (sctp_header_t * sctp_hdr,
+                 sctp_chunks_common_hdr_t * sctp_chunk_hdr,
+                 sctp_connection_t * sctp_conn, vlib_buffer_t * b0,
+                 u16 sctp_implied_length)
+{
+  sctp_init_chunk_t *init_chunk = (sctp_init_chunk_t *) (sctp_hdr);
+  ip4_address_t *ip4_addr = 0;
+  ip6_address_t *ip6_addr = 0;
+  char hostname[FQDN_MAX_LENGTH];
+
+  /* Check the current state of the connection
+   *
+   * The logic required by the RFC4960 Section 5.2.2 is already taken care of
+   * in the code below and by the "sctp_prepare_initack_chunk" function.
+   * However, for debugging purposes it is nice to have a message printed out
+   * for these corner-case scenarios.
+   */
+  if (sctp_conn->state != SCTP_STATE_CLOSED)
+    {                          /* UNEXPECTED scenario */
+      switch (sctp_conn->state)
+       {
+       case SCTP_STATE_COOKIE_WAIT:    /* TODO */
+         SCTP_ADV_DBG ("Received INIT chunk while in COOKIE_WAIT state");
+         break;
+       case SCTP_STATE_COOKIE_ECHOED:  /* TODO */
+         SCTP_ADV_DBG ("Received INIT chunk while in COOKIE_ECHOED state");
+         break;
+       }
+    }
+
+  if (sctp_hdr->verification_tag != 0x0)
+    return SCTP_ERROR_INVALID_TAG_FOR_INIT;
+
+  /*
+   * It is not possible to bundle any other CHUNK with the INIT chunk
+   */
+  if (sctp_is_bundling (sctp_implied_length, &init_chunk->chunk_hdr))
+    return SCTP_ERROR_BUNDLING_VIOLATION;
+
+  /* Save the INITIATE_TAG of the remote peer for this connection:
+   * it MUST be used for the VERIFICATION_TAG parameter in the SCTP HEADER */
+  sctp_conn->remote_tag = init_chunk->initiate_tag;
+  sctp_conn->snd_opts.a_rwnd = clib_net_to_host_u32 (init_chunk->a_rwnd);
+
+  /*
+   * If the length specified in the INIT message is bigger than the size in bytes of our structure it means that
+   * optional parameters have been sent with the INIT chunk and we need to parse them.
+   */
+  u16 length = vnet_sctp_get_chunk_length (sctp_chunk_hdr);
+  if (length > sizeof (sctp_init_chunk_t))
+    {
+      /* There are optional parameters in the INIT chunk */
+      u16 pointer_offset = sizeof (sctp_init_chunk_t);
+      while (pointer_offset < length)
+       {
+         sctp_opt_params_hdr_t *opt_params_hdr =
+           (sctp_opt_params_hdr_t *) init_chunk + pointer_offset;
+
+         switch (clib_net_to_host_u16 (opt_params_hdr->type))
+           {
+           case SCTP_IPV4_ADDRESS_TYPE:
+             {
+               sctp_ipv4_addr_param_t *ipv4 =
+                 (sctp_ipv4_addr_param_t *) opt_params_hdr;
+               clib_memcpy (ip4_addr, &ipv4->address,
+                            sizeof (ip4_address_t));
+
+               sctp_sub_connection_add_ip4 (vlib_get_thread_index (), ipv4);
+
+               break;
+             }
+           case SCTP_IPV6_ADDRESS_TYPE:
+             {
+               sctp_ipv6_addr_param_t *ipv6 =
+                 (sctp_ipv6_addr_param_t *) opt_params_hdr;
+               clib_memcpy (ip6_addr, &ipv6->address,
+                            sizeof (ip6_address_t));
+
+               sctp_sub_connection_add_ip6 (vlib_get_thread_index (), ipv6);
+
+               break;
+             }
+           case SCTP_COOKIE_PRESERVATIVE_TYPE:
+             {
+               sctp_cookie_preservative_param_t *cookie_pres =
+                 (sctp_cookie_preservative_param_t *) opt_params_hdr;
+               sctp_conn->life_span_inc = cookie_pres->life_span_inc;
+               break;
+             }
+           case SCTP_HOSTNAME_ADDRESS_TYPE:
+             {
+               sctp_hostname_param_t *hostname_addr =
+                 (sctp_hostname_param_t *) opt_params_hdr;
+               clib_memcpy (hostname, hostname_addr->hostname,
+                            FQDN_MAX_LENGTH);
+               break;
+             }
+           case SCTP_SUPPORTED_ADDRESS_TYPES:
+             {
+               /* TODO */
+               break;
+             }
+           }
+         pointer_offset += clib_net_to_host_u16 (opt_params_hdr->length);
+       }
+    }
+
+  /* Reuse buffer to make init-ack and send */
+  sctp_prepare_initack_chunk (sctp_conn, b0, ip4_addr, ip6_addr);
+  return SCTP_ERROR_NONE;
+}
+
+always_inline u16
+sctp_is_valid_init_ack (sctp_header_t * sctp_hdr,
+                       sctp_chunks_common_hdr_t * sctp_chunk_hdr,
+                       sctp_connection_t * sctp_conn, vlib_buffer_t * b0,
+                       u16 sctp_implied_length)
+{
+  sctp_init_ack_chunk_t *init_ack_chunk =
+    (sctp_init_ack_chunk_t *) (sctp_hdr);
+
+  /* Check that the LOCALLY generated tag is being used by the REMOTE peer as the verification tag */
+  if (sctp_conn->local_tag != init_ack_chunk->sctp_hdr.verification_tag)
+    {
+      return SCTP_ERROR_INVALID_TAG;
+    }
+
+  /*
+   * It is not possible to bundle any other CHUNK with the INIT_ACK chunk
+   */
+  if (sctp_is_bundling (sctp_implied_length, &init_ack_chunk->chunk_hdr))
+    return SCTP_ERROR_BUNDLING_VIOLATION;
+
+  return SCTP_ERROR_NONE;
+}
+
+always_inline u16
+sctp_handle_init_ack (sctp_header_t * sctp_hdr,
+                     sctp_chunks_common_hdr_t * sctp_chunk_hdr,
+                     sctp_connection_t * sctp_conn, vlib_buffer_t * b0,
+                     u16 sctp_implied_length)
+{
+  sctp_init_ack_chunk_t *init_ack_chunk =
+    (sctp_init_ack_chunk_t *) (sctp_hdr);
+  ip4_address_t *ip4_addr = 0;
+  ip6_address_t *ip6_addr = 0;
+  sctp_state_cookie_param_t state_cookie;
+
+  char hostname[FQDN_MAX_LENGTH];
+
+  /* Check that the LOCALLY generated tag is being used by the REMOTE peer as the verification tag */
+  if (sctp_conn->local_tag != init_ack_chunk->sctp_hdr.verification_tag)
+    {
+      return SCTP_ERROR_INVALID_TAG;
+    }
+
+  /*
+   * It is not possible to bundle any other CHUNK with the INIT chunk
+   */
+  if (sctp_is_bundling (sctp_implied_length, &init_ack_chunk->chunk_hdr))
+    return SCTP_ERROR_BUNDLING_VIOLATION;
+
+  /* remote_tag to be placed in the VERIFICATION_TAG field of the COOKIE_ECHO chunk */
+  sctp_conn->remote_tag = init_ack_chunk->initiate_tag;
+  sctp_conn->snd_opts.a_rwnd = clib_net_to_host_u32 (init_ack_chunk->a_rwnd);
+
+  u16 length = vnet_sctp_get_chunk_length (sctp_chunk_hdr);
+
+  if (length > sizeof (sctp_init_ack_chunk_t))
+    /*
+     * There are optional parameters in the INIT ACK chunk
+     */
+    {
+      u16 pointer_offset = sizeof (sctp_init_ack_chunk_t);
+
+      while (pointer_offset < length)
+       {
+         sctp_opt_params_hdr_t *opt_params_hdr =
+           (sctp_opt_params_hdr_t *) ((char *) init_ack_chunk +
+                                      pointer_offset);
+
+         switch (clib_net_to_host_u16 (opt_params_hdr->type))
+           {
+           case SCTP_IPV4_ADDRESS_TYPE:
+             {
+               sctp_ipv4_addr_param_t *ipv4 =
+                 (sctp_ipv4_addr_param_t *) opt_params_hdr;
+               clib_memcpy (ip4_addr, &ipv4->address,
+                            sizeof (ip4_address_t));
+
+               sctp_sub_connection_add_ip4 (vlib_get_thread_index (), ipv4);
+
+               break;
+             }
+           case SCTP_IPV6_ADDRESS_TYPE:
+             {
+               sctp_ipv6_addr_param_t *ipv6 =
+                 (sctp_ipv6_addr_param_t *) opt_params_hdr;
+               clib_memcpy (ip6_addr, &ipv6->address,
+                            sizeof (ip6_address_t));
+
+               sctp_sub_connection_add_ip6 (vlib_get_thread_index (), ipv6);
+
+               break;
+             }
+           case SCTP_STATE_COOKIE_TYPE:
+             {
+               sctp_state_cookie_param_t *state_cookie_param =
+                 (sctp_state_cookie_param_t *) opt_params_hdr;
+
+               clib_memcpy (&state_cookie, state_cookie_param,
+                            sizeof (sctp_state_cookie_param_t));
+               break;
+             }
+           case SCTP_HOSTNAME_ADDRESS_TYPE:
+             {
+               sctp_hostname_param_t *hostname_addr =
+                 (sctp_hostname_param_t *) opt_params_hdr;
+               clib_memcpy (hostname, hostname_addr->hostname,
+                            FQDN_MAX_LENGTH);
+               break;
+             }
+           case SCTP_UNRECOGNIZED_TYPE:
+             {
+               break;
+             }
+           }
+         u16 increment = clib_net_to_host_u16 (opt_params_hdr->length);
+         /* This indicates something really bad happened */
+         if (increment == 0)
+           {
+             return SCTP_ERROR_INVALID_TAG;
+           }
+         pointer_offset += increment;
+       }
+    }
+
+  sctp_prepare_cookie_echo_chunk (sctp_conn, b0, &state_cookie);
+
+  /* Start the T1_COOKIE timer */
+  sctp_timer_set (sctp_conn, sctp_pick_conn_idx_on_chunk (COOKIE_ECHO),
+                 SCTP_TIMER_T1_COOKIE, SCTP_RTO_INIT);
+
+  return SCTP_ERROR_NONE;
+}
+
+/** Enqueue data for delivery to application */
+always_inline int
+sctp_session_enqueue_data (sctp_connection_t * tc, vlib_buffer_t * b,
+                          u16 data_len, u8 conn_idx)
+{
+  int written, error = SCTP_ERROR_ENQUEUED;
+
+  written =
+    session_enqueue_stream_connection (&tc->sub_conn[conn_idx].connection, b,
+                                      0, 1 /* queue event */ , 1);
+
+  /* Update rcv_nxt */
+  if (PREDICT_TRUE (written == data_len))
+    {
+      tc->rcv_nxt += written;
+
+      SCTP_ADV_DBG ("CONN = %u, WRITTEN [%u] == DATA_LEN [%d]",
+                   tc->sub_conn[conn_idx].connection.c_index,
+                   written, data_len);
+    }
+  /* If more data written than expected, account for out-of-order bytes. */
+  else if (written > data_len)
+    {
+      tc->rcv_nxt += written;
+
+      SCTP_ADV_DBG ("CONN = %u, WRITTEN [%u] > DATA_LEN [%d]",
+                   tc->sub_conn[conn_idx].connection.c_index,
+                   written, data_len);
+    }
+  else if (written > 0)
+    {
+      /* We've written something but FIFO is probably full now */
+      tc->rcv_nxt += written;
+
+      error = SCTP_ERROR_PARTIALLY_ENQUEUED;
+
+      SCTP_ADV_DBG
+       ("CONN = %u, WRITTEN [%u] > 0 (SCTP_ERROR_PARTIALLY_ENQUEUED)",
+        tc->sub_conn[conn_idx].connection.c_index, written);
+    }
+  else
+    {
+      SCTP_ADV_DBG ("CONN = %u, WRITTEN == 0 (SCTP_ERROR_FIFO_FULL)",
+                   tc->sub_conn[conn_idx].connection.c_index);
+
+      return SCTP_ERROR_FIFO_FULL;
+    }
+
+  return error;
+}
+
+always_inline u16
+sctp_handle_data (sctp_payload_data_chunk_t * sctp_data_chunk,
+                 sctp_connection_t * sctp_conn, vlib_buffer_t * b,
+                 u16 * next0)
+{
+  u32 error = 0, n_data_bytes;
+  u8 idx = sctp_pick_conn_idx_on_state (sctp_conn->state);
+
+  /* Check that the LOCALLY generated tag is being used by the REMOTE peer as the verification tag */
+  if (sctp_conn->local_tag != sctp_data_chunk->sctp_hdr.verification_tag)
+    {
+      return SCTP_ERROR_INVALID_TAG;
+    }
+
+  vnet_buffer (b)->sctp.sid = sctp_data_chunk->stream_id;
+  vnet_buffer (b)->sctp.ssn = sctp_data_chunk->stream_seq;
+  vnet_buffer (b)->sctp.tsn = sctp_data_chunk->tsn;
+
+  vlib_buffer_advance (b, vnet_buffer (b)->sctp.data_offset);
+  n_data_bytes = vnet_buffer (b)->sctp.data_len;
+  ASSERT (n_data_bytes);
+
+  SCTP_ADV_DBG ("POINTER_WITH_DATA = %p", b->data);
+
+  /* In order data, enqueue. Fifo figures out by itself if any out-of-order
+   * segments can be enqueued after fifo tail offset changes. */
+  error = sctp_session_enqueue_data (sctp_conn, b, n_data_bytes, idx);
+
+  sctp_timer_update (sctp_conn, idx, SCTP_TIMER_T3_RXTX, SCTP_RTO_INIT);
+
+  *next0 = sctp_next_output (sctp_conn->sub_conn[idx].c_is_ip4);
+  sctp_prepare_sack_chunk (sctp_conn, b);
+
+  return error;
+}
+
+always_inline u16
+sctp_handle_cookie_echo (sctp_header_t * sctp_hdr,
+                        sctp_chunks_common_hdr_t * sctp_chunk_hdr,
+                        sctp_connection_t * sctp_conn, vlib_buffer_t * b0)
+{
+
+  /* Build TCB */
+  u8 idx = sctp_pick_conn_idx_on_chunk (COOKIE_ECHO);
+
+  /* Check that the LOCALLY generated tag is being used by the REMOTE peer as the verification tag */
+  if (sctp_conn->local_tag != sctp_hdr->verification_tag)
+    {
+      return SCTP_ERROR_INVALID_TAG;
+    }
+
+  sctp_prepare_cookie_ack_chunk (sctp_conn, b0);
+
+  /* Change state */
+  sctp_conn->state = SCTP_STATE_ESTABLISHED;
+
+  stream_session_accept_notify (&sctp_conn->sub_conn[idx].connection);
+
+  return SCTP_ERROR_NONE;
+
+}
+
+always_inline u16
+sctp_handle_cookie_ack (sctp_header_t * sctp_hdr,
+                       sctp_chunks_common_hdr_t * sctp_chunk_hdr,
+                       sctp_connection_t * sctp_conn, vlib_buffer_t * b0)
+{
+
+  /* Stop T1_COOKIE timer */
+  u8 idx = sctp_pick_conn_idx_on_chunk (COOKIE_ACK);
+
+  /* Check that the LOCALLY generated tag is being used by the REMOTE peer as the verification tag */
+  if (sctp_conn->local_tag != sctp_hdr->verification_tag)
+    {
+      return SCTP_ERROR_INVALID_TAG;
+    }
+
+  sctp_timer_reset (sctp_conn, idx, SCTP_TIMER_T1_COOKIE);
+  /* Change state */
+  sctp_conn->state = SCTP_STATE_ESTABLISHED;
+
+  stream_session_accept_notify (&sctp_conn->sub_conn[idx].connection);
+
+  sctp_timer_set (sctp_conn, idx, SCTP_TIMER_T3_RXTX, SCTP_RTO_INIT);
+
+  return SCTP_ERROR_NONE;
+
+}
+
+always_inline uword
+sctp46_rcv_phase_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
+                        vlib_frame_t * from_frame, int is_ip4)
+{
+  sctp_main_t *tm = vnet_get_sctp_main ();
+
+  u32 n_left_from, next_index, *from, *to_next;
+  u32 my_thread_index = vm->thread_index;
+
+  from = vlib_frame_vector_args (from_frame);
+  n_left_from = from_frame->n_vectors;
+
+  next_index = node->cached_next_index;
+
+  while (n_left_from > 0)
+    {
+      u32 n_left_to_next;
+
+      vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
+
+      while (n_left_from > 0 && n_left_to_next > 0)
+       {
+         u32 bi0;
+         vlib_buffer_t *b0;
+         sctp_header_t *sctp_hdr = 0;
+         sctp_chunks_common_hdr_t *sctp_chunk_hdr = 0;
+         ip4_header_t *ip4_hdr = 0;
+         ip6_header_t *ip6_hdr = 0;
+         sctp_connection_t *sctp_conn, *new_sctp_conn;
+         u16 sctp_implied_length = 0;
+         u16 error0 = SCTP_ERROR_NONE, next0 = SCTP_RCV_PHASE_N_NEXT;
+         u8 idx;
+
+         bi0 = from[0];
+         to_next[0] = bi0;
+         from += 1;
+         to_next += 1;
+         n_left_from -= 1;
+         n_left_to_next -= 1;
+
+         b0 = vlib_get_buffer (vm, bi0);
+
+         /* If we are in SCTP_COOKIE_WAIT_STATE then the connection
+          * will come from the half-open connections pool.
+          */
+         sctp_conn =
+           sctp_half_open_connection_get (vnet_buffer (b0)->
+                                          sctp.connection_index);
+
+         if (PREDICT_FALSE (sctp_conn == 0))
+           {
+             error0 = SCTP_ERROR_INVALID_CONNECTION;
+             goto drop;
+           }
+
+         if (PREDICT_FALSE (sctp_conn == 0))
+           {
+             SCTP_ADV_DBG
+               ("sctp_conn == NULL; return SCTP_ERROR_INVALID_CONNECTION");
+             error0 = SCTP_ERROR_INVALID_CONNECTION;
+             goto drop;
+           }
+         if (is_ip4)
+           {
+             ip4_hdr = vlib_buffer_get_current (b0);
+             sctp_hdr = ip4_next_header (ip4_hdr);
+           }
+         else
+           {
+             ip6_hdr = vlib_buffer_get_current (b0);
+             sctp_hdr = ip6_next_header (ip6_hdr);
+           }
+         idx = sctp_pick_conn_idx_on_state (sctp_conn->state);
+
+         sctp_full_hdr_t *full_hdr = (sctp_full_hdr_t *) sctp_hdr;
+
+         transport_connection_t *t_conn =
+           &sctp_conn->sub_conn[idx].connection;
+
+         t_conn->lcl_port = sctp_hdr->dst_port;
+         t_conn->rmt_port = sctp_hdr->src_port;
+         t_conn->is_ip4 = is_ip4;
+
+         if (is_ip4)
+           {
+             t_conn->lcl_ip.ip4.as_u32 = ip4_hdr->dst_address.as_u32;
+             t_conn->rmt_ip.ip4.as_u32 = ip4_hdr->src_address.as_u32;
+           }
+         else
+           {
+             clib_memcpy (&t_conn->lcl_ip.ip6, &ip6_hdr->dst_address,
+                          sizeof (ip6_address_t));
+             clib_memcpy (&t_conn->rmt_ip.ip6, &ip6_hdr->src_address,
+                          sizeof (ip6_address_t));
+           }
+
+         sctp_chunk_hdr =
+           (sctp_chunks_common_hdr_t *) (&full_hdr->common_hdr);
+
+         sctp_implied_length =
+           sctp_calculate_implied_length (ip4_hdr, ip6_hdr, is_ip4);
+
+         u8 chunk_type = vnet_sctp_get_chunk_type (&full_hdr->common_hdr);
+
+         switch (chunk_type)
+           {
+           case INIT_ACK:
+             error0 =
+               sctp_is_valid_init_ack (sctp_hdr, sctp_chunk_hdr, sctp_conn,
+                                       b0, sctp_implied_length);
+
+             if (error0 == SCTP_ERROR_NONE)
+               {
+                 pool_get (tm->connections[my_thread_index], new_sctp_conn);
+                 clib_memcpy (new_sctp_conn, sctp_conn,
+                              sizeof (*new_sctp_conn));
+                 new_sctp_conn->sub_conn[idx].c_c_index =
+                   new_sctp_conn - tm->connections[my_thread_index];
+                 new_sctp_conn->sub_conn[idx].c_thread_index =
+                   my_thread_index;
+                 new_sctp_conn->sub_conn[idx].parent = new_sctp_conn;
+
+                 if (sctp_half_open_connection_cleanup (sctp_conn))
+                   {
+                     SCTP_DBG
+                       ("Cannot cleanup half-open connection; not the owning thread");
+                   }
+
+                 sctp_connection_timers_init (new_sctp_conn);
+
+                 error0 =
+                   sctp_handle_init_ack (sctp_hdr, sctp_chunk_hdr,
+                                         new_sctp_conn, b0,
+                                         sctp_implied_length);
+
+                 sctp_connection_init_vars (new_sctp_conn);
+
+                 if (session_stream_connect_notify
+                     (&new_sctp_conn->sub_conn[idx].connection, 0))
+                   {
+                     SCTP_DBG
+                       ("conn_index = %u: session_stream_connect_notify error; cleaning up connection",
+                        new_sctp_conn->sub_conn[idx].connection.c_index);
+                     sctp_connection_cleanup (new_sctp_conn);
+                     goto drop;
+                   }
+               }
+             next0 = sctp_next_output (is_ip4);
+             break;
+
+             /* All UNEXPECTED scenarios (wrong chunk received per state-machine)
+              * are handled by the input-dispatcher function using the table-lookup
+              * hence we should never get to the "default" case below.
+              */
+           default:
+             error0 = SCTP_ERROR_UNKOWN_CHUNK;
+             next0 = SCTP_NEXT_DROP;
+             goto drop;
+           }
+
+         if (error0 != SCTP_ERROR_NONE)
+           {
+             clib_warning ("error while parsing chunk");
+             sctp_connection_cleanup (sctp_conn);
+             next0 = SCTP_NEXT_DROP;
+             goto drop;
+           }
+
+       drop:
+         b0->error = node->errors[error0];
+         if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED))
+           {
+             sctp_rx_trace_t *t0 =
+               vlib_add_trace (vm, node, b0, sizeof (*t0));
+             sctp_set_rx_trace_data (t0, sctp_conn, sctp_hdr, b0, is_ip4);
+           }
+
+         vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next,
+                                          n_left_to_next, bi0, next0);
+       }
+
+      vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+    }
+  return from_frame->n_vectors;
+}
+
+static uword
+sctp4_rcv_phase (vlib_main_t * vm, vlib_node_runtime_t * node,
+                vlib_frame_t * from_frame)
+{
+  return sctp46_rcv_phase_inline (vm, node, from_frame, 1 /* is_ip4 */ );
+}
+
+static uword
+sctp6_rcv_phase (vlib_main_t * vm, vlib_node_runtime_t * node,
+                vlib_frame_t * from_frame)
+{
+  return sctp46_rcv_phase_inline (vm, node, from_frame, 0 /* is_ip4 */ );
+}
+
+u8 *
+format_sctp_rx_trace_short (u8 * s, va_list * args)
+{
+  CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
+  CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
+  sctp_rx_trace_t *t = va_arg (*args, sctp_rx_trace_t *);
+
+  s = format (s, "%d -> %d (%U)",
+             clib_net_to_host_u16 (t->sctp_header.src_port),
+             clib_net_to_host_u16 (t->sctp_header.dst_port),
+             format_sctp_state, t->sctp_connection.state);
+
+  return s;
+}
+
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE (sctp4_rcv_phase_node) =
+{
+  .function = sctp4_rcv_phase,
+  .name = "sctp4-rcv",
+  /* Takes a vector of packets. */
+  .vector_size = sizeof (u32),
+  .n_errors = SCTP_N_ERROR,
+  .error_strings = sctp_error_strings,
+  .n_next_nodes = SCTP_RCV_PHASE_N_NEXT,
+  .next_nodes =
+  {
+#define _(s,n) [SCTP_RCV_PHASE_NEXT_##s] = n,
+    foreach_sctp_state_next
+#undef _
+  },
+  .format_trace = format_sctp_rx_trace_short,
+};
+/* *INDENT-ON* */
+
+VLIB_NODE_FUNCTION_MULTIARCH (sctp4_rcv_phase_node, sctp4_rcv_phase);
+
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE (sctp6_init_phase_node) =
+{
+  .function = sctp6_rcv_phase,
+  .name = "sctp6-rcv",
+  /* Takes a vector of packets. */
+  .vector_size = sizeof (u32),
+  .n_errors = SCTP_N_ERROR,
+  .error_strings = sctp_error_strings,
+  .n_next_nodes = SCTP_RCV_PHASE_N_NEXT,
+  .next_nodes =
+  {
+#define _(s,n) [SCTP_RCV_PHASE_NEXT_##s] = n,
+    foreach_sctp_state_next
+#undef _
+  },
+  .format_trace = format_sctp_rx_trace_short,
+};
+/* *INDENT-ON* */
+
+VLIB_NODE_FUNCTION_MULTIARCH (sctp6_init_phase_node, sctp6_rcv_phase);
+
+vlib_node_registration_t sctp4_shutdown_phase_node;
+vlib_node_registration_t sctp6_shutdown_phase_node;
+
+always_inline u16
+sctp_handle_shutdown (sctp_header_t * sctp_hdr,
+                     sctp_chunks_common_hdr_t * sctp_chunk_hdr,
+                     sctp_connection_t * sctp_conn, vlib_buffer_t * b0,
+                     u16 sctp_implied_length)
+{
+  sctp_shutdown_association_chunk_t *shutdown_chunk =
+    (sctp_shutdown_association_chunk_t *) (sctp_hdr);
+
+  /* Check that the LOCALLY generated tag is being used by the REMOTE peer as the verification tag */
+  if (sctp_conn->local_tag != sctp_hdr->verification_tag)
+    {
+      return SCTP_ERROR_INVALID_TAG;
+    }
+
+  /*
+   * It is not possible to bundle any other CHUNK with the SHUTDOWN chunk
+   */
+  if (sctp_is_bundling (sctp_implied_length, &shutdown_chunk->chunk_hdr))
+    return SCTP_ERROR_BUNDLING_VIOLATION;
+
+  switch (sctp_conn->state)
+    {
+    case SCTP_STATE_ESTABLISHED:
+      if (sctp_check_outstanding_data_chunks (sctp_conn) == 0)
+       sctp_conn->state = SCTP_STATE_SHUTDOWN_RECEIVED;
+      break;
+
+    case SCTP_STATE_SHUTDOWN_SENT:
+      sctp_send_shutdown_ack (sctp_conn);
+      break;
+    }
+
+  return SCTP_ERROR_NONE;
+}
+
+always_inline u16
+sctp_handle_shutdown_ack (sctp_header_t * sctp_hdr,
+                         sctp_chunks_common_hdr_t * sctp_chunk_hdr,
+                         sctp_connection_t * sctp_conn, vlib_buffer_t * b0,
+                         u16 sctp_implied_length)
+{
+  sctp_shutdown_ack_chunk_t *shutdown_ack_chunk =
+    (sctp_shutdown_ack_chunk_t *) (sctp_hdr);
+
+  /* Check that the LOCALLY generated tag is being used by the REMOTE peer as the verification tag */
+  if (sctp_conn->local_tag != sctp_hdr->verification_tag)
+    {
+      return SCTP_ERROR_INVALID_TAG;
+    }
+
+  /*
+   * It is not possible to bundle any other CHUNK with the SHUTDOWN chunk
+   */
+  if (sctp_is_bundling (sctp_implied_length, &shutdown_ack_chunk->chunk_hdr))
+    return SCTP_ERROR_BUNDLING_VIOLATION;
+
+  /* Whether we are in SCTP_STATE_SHUTDOWN_SENT or SCTP_STATE_SHUTDOWN_ACK_SENT
+   * the reception of a SHUTDOWN_ACK chunk leads to the same actions:
+   * - STOP T2_SHUTDOWN timer
+   * - SEND SHUTDOWN_COMPLETE chunk
+   */
+  sctp_timer_reset (sctp_conn, MAIN_SCTP_SUB_CONN_IDX,
+                   SCTP_TIMER_T2_SHUTDOWN);
+  sctp_send_shutdown_complete (sctp_conn);
+
+  return SCTP_ERROR_NONE;
+}
+
+always_inline u16
+sctp_handle_shutdown_complete (sctp_header_t * sctp_hdr,
+                              sctp_chunks_common_hdr_t * sctp_chunk_hdr,
+                              sctp_connection_t * sctp_conn,
+                              vlib_buffer_t * b0, u16 sctp_implied_length)
+{
+  sctp_shutdown_complete_chunk_t *shutdown_complete =
+    (sctp_shutdown_complete_chunk_t *) (sctp_hdr);
+
+  /* Check that the LOCALLY generated tag is being used by the REMOTE peer as the verification tag */
+  if (sctp_conn->local_tag != sctp_hdr->verification_tag)
+    {
+      return SCTP_ERROR_INVALID_TAG;
+    }
+
+  /*
+   * It is not possible to bundle any other CHUNK with the SHUTDOWN chunk
+   */
+  if (sctp_is_bundling (sctp_implied_length, &shutdown_complete->chunk_hdr))
+    return SCTP_ERROR_BUNDLING_VIOLATION;
+
+  sctp_timer_reset (sctp_conn, MAIN_SCTP_SUB_CONN_IDX,
+                   SCTP_TIMER_T2_SHUTDOWN);
+
+  sctp_conn->state = SCTP_STATE_CLOSED;
+
+  stream_session_disconnect_notify (&sctp_conn->sub_conn
+                                   [MAIN_SCTP_SUB_CONN_IDX].connection);
+
+  return SCTP_ERROR_NONE;
+}
+
+always_inline uword
+sctp46_shutdown_phase_inline (vlib_main_t * vm,
+                             vlib_node_runtime_t * node,
+                             vlib_frame_t * from_frame, int is_ip4)
+{
+  u32 n_left_from, next_index, *from, *to_next;
+  u32 my_thread_index = vm->thread_index;
+
+  from = vlib_frame_vector_args (from_frame);
+  n_left_from = from_frame->n_vectors;
+
+  next_index = node->cached_next_index;
+
+  while (n_left_from > 0)
+    {
+      u32 n_left_to_next;
+
+      vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
+
+      while (n_left_from > 0 && n_left_to_next > 0)
+       {
+         u32 bi0;
+         vlib_buffer_t *b0;
+         sctp_rx_trace_t *sctp_trace;
+         sctp_header_t *sctp_hdr = 0;
+         sctp_chunks_common_hdr_t *sctp_chunk_hdr = 0;
+         ip4_header_t *ip4_hdr = 0;
+         ip6_header_t *ip6_hdr = 0;
+         sctp_connection_t *sctp_conn;
+         u16 sctp_implied_length = 0;
+         u16 error0 = SCTP_ERROR_NONE, next0 = SCTP_RCV_PHASE_N_NEXT;
+
+         bi0 = from[0];
+         to_next[0] = bi0;
+         from += 1;
+         to_next += 1;
+         n_left_from -= 1;
+         n_left_to_next -= 1;
+
+         b0 = vlib_get_buffer (vm, bi0);
+         sctp_conn =
+           sctp_connection_get (vnet_buffer (b0)->sctp.connection_index,
+                                my_thread_index);
+
+         if (PREDICT_FALSE (sctp_conn == 0))
+           {
+             SCTP_DBG
+               ("sctp_conn == NULL; return SCTP_ERROR_INVALID_CONNECTION");
+             error0 = SCTP_ERROR_INVALID_CONNECTION;
+             goto drop;
+           }
+
+         if (is_ip4)
+           {
+             ip4_hdr = vlib_buffer_get_current (b0);
+             sctp_hdr = ip4_next_header (ip4_hdr);
+           }
+         else
+           {
+             ip6_hdr = vlib_buffer_get_current (b0);
+             sctp_hdr = ip6_next_header (ip6_hdr);
+           }
+
+         sctp_full_hdr_t *full_hdr = (sctp_full_hdr_t *) sctp_hdr;
+         sctp_chunk_hdr = &full_hdr->common_hdr;
+
+         sctp_implied_length =
+           sctp_calculate_implied_length (ip4_hdr, ip6_hdr, is_ip4);
+
+         switch (vnet_sctp_get_chunk_type (sctp_chunk_hdr))
+           {
+           case SHUTDOWN:
+             error0 =
+               sctp_handle_shutdown (sctp_hdr, sctp_chunk_hdr, sctp_conn, b0,
+                                     sctp_implied_length);
+             next0 = sctp_next_output (is_ip4);
+             break;
+
+           case SHUTDOWN_ACK:
+             error0 =
+               sctp_handle_shutdown_ack (sctp_hdr, sctp_chunk_hdr, sctp_conn,
+                                         b0, sctp_implied_length);
+             next0 = sctp_next_output (is_ip4);
+             break;
+
+           case SHUTDOWN_COMPLETE:
+             error0 =
+               sctp_handle_shutdown_complete (sctp_hdr, sctp_chunk_hdr,
+                                              sctp_conn, b0,
+                                              sctp_implied_length);
+
+             sctp_connection_cleanup (sctp_conn);
+             next0 = sctp_next_output (is_ip4);
+             break;
+
+             /*
+              * DATA chunks can still be transmitted/received in the SHUTDOWN-PENDING
+              * and SHUTDOWN-SENT states (as per RFC4960 Section 6)
+              */
+           case DATA:
+             error0 =
+               sctp_handle_data ((sctp_payload_data_chunk_t *) sctp_hdr,
+                                 sctp_conn, b0, &next0);
+             next0 = sctp_next_output (is_ip4);
+             break;
+
+             /* All UNEXPECTED scenarios (wrong chunk received per state-machine)
+              * are handled by the input-dispatcher function using the table-lookup
+              * hence we should never get to the "default" case below.
+              */
+           default:
+             error0 = SCTP_ERROR_UNKOWN_CHUNK;
+             next0 = SCTP_NEXT_DROP;
+             goto drop;
+           }
+
+         if (error0 != SCTP_ERROR_NONE)
+           {
+             clib_warning ("error while parsing chunk");
+             sctp_connection_cleanup (sctp_conn);
+             next0 = SCTP_NEXT_DROP;
+             goto drop;
+           }
+
+       drop:
+         if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED))
+           {
+             sctp_trace =
+               vlib_add_trace (vm, node, b0, sizeof (*sctp_trace));
+             clib_memcpy (&sctp_trace->sctp_header, sctp_hdr,
+                          sizeof (sctp_trace->sctp_header));
+             clib_memcpy (&sctp_trace->sctp_connection, sctp_conn,
+                          sizeof (sctp_trace->sctp_connection));
+           }
+
+         b0->error = node->errors[error0];
+
+         vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next,
+                                          n_left_to_next, bi0, next0);
+       }
+
+      vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+    }
+
+  return from_frame->n_vectors;
+
+}
+
+static uword
+sctp4_shutdown_phase (vlib_main_t * vm, vlib_node_runtime_t * node,
+                     vlib_frame_t * from_frame)
+{
+  return sctp46_shutdown_phase_inline (vm, node, from_frame, 1 /* is_ip4 */ );
+}
+
+static uword
+sctp6_shutdown_phase (vlib_main_t * vm, vlib_node_runtime_t * node,
+                     vlib_frame_t * from_frame)
+{
+  return sctp46_shutdown_phase_inline (vm, node, from_frame, 1 /* is_ip4 */ );
+}
+
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE (sctp4_shutdown_phase_node) =
+{
+  .function = sctp4_shutdown_phase,
+  .name = "sctp4-shutdown",
+  /* Takes a vector of packets. */
+  .vector_size = sizeof (u32),
+  .n_errors = SCTP_N_ERROR,
+  .error_strings = sctp_error_strings,
+  .n_next_nodes = SCTP_SHUTDOWN_PHASE_N_NEXT,
+  .next_nodes =
+  {
+#define _(s,n) [SCTP_SHUTDOWN_PHASE_NEXT_##s] = n,
+    foreach_sctp_state_next
+#undef _
+  },
+  .format_trace = format_sctp_rx_trace_short,
+};
+/* *INDENT-ON* */
+
+VLIB_NODE_FUNCTION_MULTIARCH (sctp4_shutdown_phase_node,
+                             sctp4_shutdown_phase);
+
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE (sctp6_shutdown_phase_node) =
+{
+  .function = sctp6_shutdown_phase,
+  .name = "sctp6-shutdown",
+  /* Takes a vector of packets. */
+  .vector_size = sizeof (u32),
+  .n_errors = SCTP_N_ERROR,
+  .error_strings = sctp_error_strings,
+  .n_next_nodes = SCTP_SHUTDOWN_PHASE_N_NEXT,
+  .next_nodes =
+  {
+#define _(s,n) [SCTP_SHUTDOWN_PHASE_NEXT_##s] = n,
+    foreach_sctp_state_next
+#undef _
+  },
+  .format_trace = format_sctp_rx_trace_short,
+};
+/* *INDENT-ON* */
+
+VLIB_NODE_FUNCTION_MULTIARCH (sctp6_shutdown_phase_node,
+                             sctp6_shutdown_phase);
+
+vlib_node_registration_t sctp4_listen_phase_node;
+vlib_node_registration_t sctp6_listen_phase_node;
+
+vlib_node_registration_t sctp4_established_phase_node;
+vlib_node_registration_t sctp6_established_phase_node;
+
+always_inline u16
+sctp_handle_sack (sctp_selective_ack_chunk_t * sack_chunk,
+                 sctp_connection_t * sctp_conn, vlib_buffer_t * b0,
+                 u16 * next0)
+{
+  *next0 =
+    sctp_next_output (sctp_conn->sub_conn[MAIN_SCTP_SUB_CONN_IDX].
+                     connection.is_ip4);
+
+  return SCTP_ERROR_NONE;
+}
+
+always_inline u16
+sctp_handle_heartbeat (sctp_hb_req_chunk_t * sctp_hb_chunk,
+                      sctp_connection_t * sctp_conn, vlib_buffer_t * b0,
+                      u16 * next0)
+{
+  return SCTP_ERROR_NONE;
+}
+
+always_inline u16
+sctp_handle_heartbeat_ack (sctp_hb_ack_chunk_t * sctp_hb_ack_chunk,
+                          sctp_connection_t * sctp_conn, vlib_buffer_t * b0,
+                          u16 * next0)
+{
+  return SCTP_ERROR_NONE;
+}
+
+always_inline void
+sctp_node_inc_counter (vlib_main_t * vm, u32 tcp4_node, u32 tcp6_node,
+                      u8 is_ip4, u8 evt, u8 val)
+{
+  if (PREDICT_TRUE (!val))
+    return;
+
+  if (is_ip4)
+    vlib_node_increment_counter (vm, tcp4_node, evt, val);
+  else
+    vlib_node_increment_counter (vm, tcp6_node, evt, val);
+}
+
+always_inline uword
+sctp46_listen_process_inline (vlib_main_t * vm,
+                             vlib_node_runtime_t * node,
+                             vlib_frame_t * from_frame, int is_ip4)
+{
+  u32 n_left_from, next_index, *from, *to_next;
+  u32 my_thread_index = vm->thread_index;
+
+  from = vlib_frame_vector_args (from_frame);
+  n_left_from = from_frame->n_vectors;
+
+  next_index = node->cached_next_index;
+
+  while (n_left_from > 0)
+    {
+      u32 n_left_to_next;
+
+      vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
+
+      while (n_left_from > 0 && n_left_to_next > 0)
+       {
+         u32 bi0;
+         vlib_buffer_t *b0;
+         sctp_header_t *sctp_hdr = 0;
+         ip4_header_t *ip4_hdr;
+         ip6_header_t *ip6_hdr;
+         sctp_connection_t *child_conn;
+         sctp_connection_t *sctp_listener;
+         u16 next0 = SCTP_LISTEN_PHASE_N_NEXT, error0 = SCTP_ERROR_ENQUEUED;
+
+         bi0 = from[0];
+         to_next[0] = bi0;
+         from += 1;
+         to_next += 1;
+         n_left_from -= 1;
+         n_left_to_next -= 1;
+
+         b0 = vlib_get_buffer (vm, bi0);
+         sctp_listener =
+           sctp_listener_get (vnet_buffer (b0)->sctp.connection_index);
+
+         if (is_ip4)
+           {
+             ip4_hdr = vlib_buffer_get_current (b0);
+             sctp_hdr = ip4_next_header (ip4_hdr);
+           }
+         else
+           {
+             ip6_hdr = vlib_buffer_get_current (b0);
+             sctp_hdr = ip6_next_header (ip6_hdr);
+           }
+
+         child_conn =
+           sctp_lookup_connection (sctp_listener->sub_conn
+                                   [MAIN_SCTP_SUB_CONN_IDX].c_fib_index, b0,
+                                   my_thread_index, is_ip4);
+
+         if (PREDICT_FALSE (child_conn->state != SCTP_STATE_CLOSED))
+           {
+             SCTP_DBG
+               ("conn_index = %u: child_conn->state != SCTP_STATE_CLOSED.... STATE=%s",
+                child_conn->sub_conn[MAIN_SCTP_SUB_CONN_IDX].
+                connection.c_index,
+                sctp_state_to_string (child_conn->state));
+             error0 = SCTP_ERROR_CREATE_EXISTS;
+             goto drop;
+           }
+
+         /* Create child session and send SYN-ACK */
+         child_conn = sctp_connection_new (my_thread_index);
+         child_conn->sub_conn[MAIN_SCTP_SUB_CONN_IDX].parent = child_conn;
+         child_conn->sub_conn[MAIN_SCTP_SUB_CONN_IDX].c_lcl_port =
+           sctp_hdr->dst_port;
+         child_conn->sub_conn[MAIN_SCTP_SUB_CONN_IDX].c_rmt_port =
+           sctp_hdr->src_port;
+         child_conn->sub_conn[MAIN_SCTP_SUB_CONN_IDX].c_is_ip4 = is_ip4;
+         child_conn->sub_conn[MAIN_SCTP_SUB_CONN_IDX].connection.proto =
+           sctp_listener->sub_conn[MAIN_SCTP_SUB_CONN_IDX].connection.proto;
+         child_conn->state = SCTP_STATE_CLOSED;
+
+         if (is_ip4)
+           {
+             child_conn->sub_conn[MAIN_SCTP_SUB_CONN_IDX].c_lcl_ip4.as_u32 =
+               ip4_hdr->dst_address.as_u32;
+             child_conn->sub_conn[MAIN_SCTP_SUB_CONN_IDX].c_rmt_ip4.as_u32 =
+               ip4_hdr->src_address.as_u32;
+           }
+         else
+           {
+             clib_memcpy (&child_conn->
+                          sub_conn[MAIN_SCTP_SUB_CONN_IDX].c_lcl_ip6,
+                          &ip6_hdr->dst_address, sizeof (ip6_address_t));
+             clib_memcpy (&child_conn->
+                          sub_conn[MAIN_SCTP_SUB_CONN_IDX].c_rmt_ip6,
+                          &ip6_hdr->src_address, sizeof (ip6_address_t));
+           }
+
+         sctp_full_hdr_t *full_hdr = (sctp_full_hdr_t *) sctp_hdr;
+         sctp_chunks_common_hdr_t *sctp_chunk_hdr = &full_hdr->common_hdr;
+
+         u8 chunk_type = vnet_sctp_get_chunk_type (sctp_chunk_hdr);
+         if (chunk_type != INIT)
+           {
+             SCTP_DBG
+               ("conn_index = %u: chunk_type != INIT... chunk_type=%s",
+                child_conn->sub_conn[MAIN_SCTP_SUB_CONN_IDX].
+                connection.c_index, sctp_chunk_to_string (chunk_type));
+
+             error0 = SCTP_ERROR_UNKOWN_CHUNK;
+             next0 = SCTP_NEXT_DROP;
+             goto drop;
+           }
+
+         u16 sctp_implied_length =
+           sctp_calculate_implied_length (ip4_hdr, ip6_hdr, is_ip4);
+
+         switch (chunk_type)
+           {
+           case INIT:
+             sctp_connection_timers_init (child_conn);
+
+             error0 =
+               sctp_handle_init (sctp_hdr, sctp_chunk_hdr, child_conn, b0,
+                                 sctp_implied_length);
+
+             sctp_connection_init_vars (child_conn);
+
+
+             if (error0 == SCTP_ERROR_NONE)
+               {
+                 if (stream_session_accept
+                     (&child_conn->
+                      sub_conn[MAIN_SCTP_SUB_CONN_IDX].connection,
+                      sctp_listener->
+                      sub_conn[MAIN_SCTP_SUB_CONN_IDX].c_s_index, 0))
+                   {
+                     clib_warning ("session accept fail");
+                     sctp_connection_cleanup (child_conn);
+                     error0 = SCTP_ERROR_CREATE_SESSION_FAIL;
+                     goto drop;
+                   }
+               }
+             next0 = sctp_next_output (is_ip4);
+             break;
+
+             /* Reception of a DATA chunk whilst in the CLOSED state is called
+              * "Out of the Blue" packet and handling of the chunk needs special treatment
+              * as per RFC4960 section 8.4
+              */
+           case DATA:
+             break;
+           }
+
+       drop:
+         if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED))
+           {
+             sctp_rx_trace_t *t0 =
+               vlib_add_trace (vm, node, b0, sizeof (*t0));
+             clib_memcpy (&t0->sctp_header, sctp_hdr,
+                          sizeof (t0->sctp_header));
+             clib_memcpy (&t0->sctp_connection, sctp_listener,
+                          sizeof (t0->sctp_connection));
+           }
+
+         b0->error = node->errors[error0];
+
+         vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next,
+                                          n_left_to_next, bi0, next0);
+       }
+      vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+
+    }
+  return from_frame->n_vectors;
+}
+
+static uword
+sctp4_listen_phase (vlib_main_t * vm, vlib_node_runtime_t * node,
+                   vlib_frame_t * from_frame)
+{
+  return sctp46_listen_process_inline (vm, node, from_frame, 1 /* is_ip4 */ );
+}
+
+static uword
+sctp6_listen_phase (vlib_main_t * vm, vlib_node_runtime_t * node,
+                   vlib_frame_t * from_frame)
+{
+  return sctp46_listen_process_inline (vm, node, from_frame, 0 /* is_ip4 */ );
+}
+
+always_inline uword
+sctp46_established_phase_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
+                                vlib_frame_t * from_frame, int is_ip4)
+{
+  u32 n_left_from, next_index, *from, *to_next;
+  u32 my_thread_index = vm->thread_index, errors = 0;
+
+  from = vlib_frame_vector_args (from_frame);
+  n_left_from = from_frame->n_vectors;
+
+  next_index = node->cached_next_index;
+
+  while (n_left_from > 0)
+    {
+      u32 n_left_to_next;
+
+      vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
+
+      while (n_left_from > 0 && n_left_to_next > 0)
+       {
+         u32 bi0;
+         vlib_buffer_t *b0;
+         sctp_header_t *sctp_hdr = 0;
+         sctp_chunks_common_hdr_t *sctp_chunk_hdr = 0;
+         ip4_header_t *ip4_hdr = 0;
+         ip6_header_t *ip6_hdr = 0;
+         sctp_connection_t *sctp_conn;
+         u16 error0 = SCTP_ERROR_NONE, next0 = SCTP_ESTABLISHED_PHASE_N_NEXT;
+         u8 idx;
+
+         bi0 = from[0];
+         to_next[0] = bi0;
+         from += 1;
+         to_next += 1;
+         n_left_from -= 1;
+         n_left_to_next -= 1;
+
+         b0 = vlib_get_buffer (vm, bi0);
+         sctp_conn =
+           sctp_connection_get (vnet_buffer (b0)->sctp.connection_index,
+                                my_thread_index);
+
+         if (PREDICT_FALSE (sctp_conn == 0))
+           {
+             SCTP_DBG
+               ("sctp_conn == NULL; return SCTP_ERROR_INVALID_CONNECTION");
+             error0 = SCTP_ERROR_INVALID_CONNECTION;
+             goto done;
+           }
+         if (is_ip4)
+           {
+             ip4_hdr = vlib_buffer_get_current (b0);
+             sctp_hdr = ip4_next_header (ip4_hdr);
+           }
+         else
+           {
+             ip6_hdr = vlib_buffer_get_current (b0);
+             sctp_hdr = ip6_next_header (ip6_hdr);
+           }
+
+         idx = sctp_pick_conn_idx_on_state (sctp_conn->state);
+
+         sctp_full_hdr_t *full_hdr = (sctp_full_hdr_t *) sctp_hdr;
+
+         transport_connection_t *t_conn =
+           &sctp_conn->sub_conn[idx].connection;
+
+         t_conn->lcl_port = sctp_hdr->dst_port;
+         t_conn->rmt_port = sctp_hdr->src_port;
+         t_conn->is_ip4 = is_ip4;
+
+         sctp_conn->sub_conn[idx].parent = sctp_conn;
+
+         if (is_ip4)
+           {
+             t_conn->lcl_ip.ip4.as_u32 = ip4_hdr->dst_address.as_u32;
+             t_conn->rmt_ip.ip4.as_u32 = ip4_hdr->src_address.as_u32;
+           }
+         else
+           {
+             clib_memcpy (&t_conn->lcl_ip.ip6, &ip6_hdr->dst_address,
+                          sizeof (ip6_address_t));
+             clib_memcpy (&t_conn->rmt_ip.ip6, &ip6_hdr->src_address,
+                          sizeof (ip6_address_t));
+           }
+
+         sctp_chunk_hdr =
+           (sctp_chunks_common_hdr_t *) (&full_hdr->common_hdr);
+
+         u8 chunk_type = vnet_sctp_get_chunk_type (&full_hdr->common_hdr);
+
+         switch (chunk_type)
+           {
+           case COOKIE_ECHO:
+             error0 =
+               sctp_handle_cookie_echo (sctp_hdr, sctp_chunk_hdr, sctp_conn,
+                                        b0);
+             next0 = sctp_next_output (is_ip4);
+             break;
+
+           case COOKIE_ACK:
+             error0 =
+               sctp_handle_cookie_ack (sctp_hdr, sctp_chunk_hdr, sctp_conn,
+                                       b0);
+             next0 = sctp_next_output (is_ip4);
+             break;
+
+           case SACK:
+             error0 =
+               sctp_handle_sack ((sctp_selective_ack_chunk_t *) sctp_hdr,
+                                 sctp_conn, b0, &next0);
+             break;
+
+           case HEARTBEAT:
+             error0 =
+               sctp_handle_heartbeat ((sctp_hb_req_chunk_t *) sctp_hdr,
+                                      sctp_conn, b0, &next0);
+             break;
+
+           case HEARTBEAT_ACK:
+             error0 =
+               sctp_handle_heartbeat_ack ((sctp_hb_ack_chunk_t *) sctp_hdr,
+                                          sctp_conn, b0, &next0);
+             break;
+
+           case DATA:
+             error0 =
+               sctp_handle_data ((sctp_payload_data_chunk_t *) sctp_hdr,
+                                 sctp_conn, b0, &next0);
+             break;
+
+             /* All UNEXPECTED scenarios (wrong chunk received per state-machine)
+              * are handled by the input-dispatcher function using the table-lookup
+              * hence we should never get to the "default" case below.
+              */
+           default:
+             error0 = SCTP_ERROR_UNKOWN_CHUNK;
+             next0 = SCTP_NEXT_DROP;
+             goto done;
+           }
+
+       done:
+         b0->error = node->errors[error0];
+         if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED))
+           {
+             sctp_rx_trace_t *t0 =
+               vlib_add_trace (vm, node, b0, sizeof (*t0));
+             sctp_set_rx_trace_data (t0, sctp_conn, sctp_hdr, b0, is_ip4);
+           }
+
+         vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next,
+                                          n_left_to_next, bi0, next0);
+       }
+
+      vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+    }
+
+  errors = session_manager_flush_enqueue_events (TRANSPORT_PROTO_SCTP,
+                                                my_thread_index);
+
+  sctp_node_inc_counter (vm, is_ip4, sctp4_established_phase_node.index,
+                        sctp6_established_phase_node.index,
+                        SCTP_ERROR_EVENT_FIFO_FULL, errors);
+  sctp_flush_frame_to_output (vm, my_thread_index, is_ip4);
+
+  return from_frame->n_vectors;
+}
+
+static uword
+sctp4_established_phase (vlib_main_t * vm, vlib_node_runtime_t * node,
+                        vlib_frame_t * from_frame)
+{
+  return sctp46_established_phase_inline (vm, node, from_frame,
+                                         1 /* is_ip4 */ );
+}
+
+static uword
+sctp6_established_phase (vlib_main_t * vm, vlib_node_runtime_t * node,
+                        vlib_frame_t * from_frame)
+{
+  return sctp46_established_phase_inline (vm, node, from_frame,
+                                         0 /* is_ip4 */ );
+}
+
+u8 *
+format_sctp_rx_trace (u8 * s, va_list * args)
+{
+  CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
+  CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
+  sctp_rx_trace_t *t = va_arg (*args, sctp_rx_trace_t *);
+  u32 indent = format_get_indent (s);
+
+  s = format (s, "%U\n%U%U",
+             format_sctp_header, &t->sctp_header, 128,
+             format_white_space, indent,
+             format_sctp_connection, &t->sctp_connection, 1);
+
+  return s;
+}
+
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE (sctp4_listen_phase_node) =
+{
+  .function = sctp4_listen_phase,
+  .name = "sctp4-listen",
+  /* Takes a vector of packets. */
+  .vector_size = sizeof (u32),
+  .n_errors = SCTP_N_ERROR,
+  .error_strings = sctp_error_strings,
+  .n_next_nodes = SCTP_LISTEN_PHASE_N_NEXT,
+  .next_nodes =
+  {
+#define _(s,n) [SCTP_LISTEN_PHASE_NEXT_##s] = n,
+    foreach_sctp_state_next
+#undef _
+  },
+  .format_trace = format_sctp_rx_trace_short,
+};
+/* *INDENT-ON* */
+
+VLIB_NODE_FUNCTION_MULTIARCH (sctp4_listen_phase_node, sctp4_listen_phase);
+
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE (sctp6_listen_phase_node) =
+{
+  .function = sctp6_listen_phase,
+  .name = "sctp6-listen",
+  /* Takes a vector of packets. */
+  .vector_size = sizeof (u32),
+  .n_errors = SCTP_N_ERROR,
+  .error_strings = sctp_error_strings,
+  .n_next_nodes = SCTP_LISTEN_PHASE_N_NEXT,
+  .next_nodes =
+  {
+#define _(s,n) [SCTP_LISTEN_PHASE_NEXT_##s] = n,
+    foreach_sctp_state_next
+#undef _
+  },
+  .format_trace = format_sctp_rx_trace_short,
+};
+/* *INDENT-ON* */
+
+VLIB_NODE_FUNCTION_MULTIARCH (sctp6_listen_phase_node, sctp6_listen_phase);
+
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE (sctp4_established_phase_node) =
+{
+  .function = sctp4_established_phase,
+  .name = "sctp4-established",
+  /* Takes a vector of packets. */
+  .vector_size = sizeof (u32),
+  .n_errors = SCTP_N_ERROR,
+  .error_strings = sctp_error_strings,
+  .n_next_nodes = SCTP_ESTABLISHED_PHASE_N_NEXT,
+  .next_nodes =
+  {
+#define _(s,n) [SCTP_ESTABLISHED_PHASE_NEXT_##s] = n,
+    foreach_sctp_state_next
+#undef _
+  },
+  .format_trace = format_sctp_rx_trace_short,
+};
+/* *INDENT-ON* */
+
+VLIB_NODE_FUNCTION_MULTIARCH (sctp4_established_phase_node,
+                             sctp4_established_phase);
+
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE (sctp6_established_phase_node) =
+{
+  .function = sctp6_established_phase,
+  .name = "sctp6-established",
+  /* Takes a vector of packets. */
+  .vector_size = sizeof (u32),
+  .n_errors = SCTP_N_ERROR,
+  .error_strings = sctp_error_strings,
+  .n_next_nodes = SCTP_LISTEN_PHASE_N_NEXT,
+  .next_nodes =
+  {
+#define _(s,n) [SCTP_LISTEN_PHASE_NEXT_##s] = n,
+    foreach_sctp_state_next
+#undef _
+  },
+  .format_trace = format_sctp_rx_trace_short,
+};
+/* *INDENT-ON* */
+
+VLIB_NODE_FUNCTION_MULTIARCH (sctp6_established_phase_node,
+                             sctp6_established_phase);
+
+/*
+ * This is the function executed first for the SCTP graph.
+ * It takes care of doing the initial message parsing and
+ * dispatch to the specialized function.
+ */
+always_inline uword
+sctp46_input_dispatcher (vlib_main_t * vm, vlib_node_runtime_t * node,
+                        vlib_frame_t * from_frame, int is_ip4)
+{
+  u32 n_left_from, next_index, *from, *to_next;
+  u32 my_thread_index = vm->thread_index;
+  u8 is_filtered;
+  sctp_main_t *tm = vnet_get_sctp_main ();
+
+  from = vlib_frame_vector_args (from_frame);
+  n_left_from = from_frame->n_vectors;
+  next_index = node->cached_next_index;
+  sctp_set_time_now (my_thread_index);
+
+  while (n_left_from > 0)
+    {
+      u32 n_left_to_next;
+
+      vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
+
+      while (n_left_from > 0 && n_left_to_next > 0)
+       {
+         int n_advance_bytes0, n_data_bytes0;
+         u32 bi0, fib_index0;
+         vlib_buffer_t *b0;
+         sctp_header_t *sctp_hdr = 0;
+         sctp_chunks_common_hdr_t *sctp_chunk_hdr = 0;
+         sctp_connection_t *sctp_conn;
+         transport_connection_t *tconn;
+         ip4_header_t *ip4_hdr;
+         ip6_header_t *ip6_hdr;
+         u32 error0 = SCTP_ERROR_NO_LISTENER, next0 = SCTP_INPUT_NEXT_DROP;
+
+         bi0 = from[0];
+         to_next[0] = bi0;
+         from += 1;
+         to_next += 1;
+         n_left_from -= 1;
+         n_left_to_next -= 1;
+
+         b0 = vlib_get_buffer (vm, bi0);
+         vnet_buffer (b0)->tcp.flags = 0;
+         fib_index0 = vnet_buffer (b0)->ip.fib_index;
+
+         /* Checksum computed by ipx_local no need to compute again */
+
+         if (is_ip4)
+           {
+             ip4_hdr = vlib_buffer_get_current (b0);
+             sctp_hdr = ip4_next_header (ip4_hdr);
+
+             sctp_full_hdr_t *full_hdr = (sctp_full_hdr_t *) sctp_hdr;
+             sctp_chunk_hdr = &full_hdr->common_hdr;
+
+             n_advance_bytes0 =
+               (ip4_header_bytes (ip4_hdr) +
+                sizeof (sctp_payload_data_chunk_t));
+             n_data_bytes0 =
+               clib_net_to_host_u16 (ip4_hdr->length) - n_advance_bytes0;
+
+             tconn = session_lookup_connection_wt4 (fib_index0,
+                                                    &ip4_hdr->dst_address,
+                                                    &ip4_hdr->src_address,
+                                                    sctp_hdr->dst_port,
+                                                    sctp_hdr->src_port,
+                                                    TRANSPORT_PROTO_SCTP,
+                                                    my_thread_index,
+                                                    &is_filtered);
+           }
+         else
+           {
+             ip6_hdr = vlib_buffer_get_current (b0);
+             sctp_hdr = ip6_next_header (ip6_hdr);
+
+             sctp_full_hdr_t *full_hdr = (sctp_full_hdr_t *) sctp_hdr;
+             sctp_chunk_hdr = &full_hdr->common_hdr;
+
+             n_advance_bytes0 = sctp_header_bytes ();
+             n_data_bytes0 =
+               clib_net_to_host_u16 (ip6_hdr->payload_length) -
+               n_advance_bytes0;
+             n_advance_bytes0 += sizeof (ip6_hdr[0]);
+
+             tconn = session_lookup_connection_wt6 (fib_index0,
+                                                    &ip6_hdr->dst_address,
+                                                    &ip6_hdr->src_address,
+                                                    sctp_hdr->dst_port,
+                                                    sctp_hdr->src_port,
+                                                    TRANSPORT_PROTO_SCTP,
+                                                    my_thread_index,
+                                                    &is_filtered);
+           }
+
+         /* Length check */
+         if (PREDICT_FALSE (n_advance_bytes0 < 0))
+           {
+             error0 = SCTP_ERROR_LENGTH;
+             goto done;
+           }
+
+         sctp_conn = sctp_get_connection_from_transport (tconn);
+         vnet_sctp_common_hdr_params_net_to_host (sctp_chunk_hdr);
+
+         u8 type = vnet_sctp_get_chunk_type (sctp_chunk_hdr);
+
+#if SCTP_DEBUG_STATE_MACHINE
+         u8 idx = sctp_pick_conn_idx_on_state (sctp_conn->state);
+#endif
+         vnet_buffer (b0)->sctp.hdr_offset =
+           (u8 *) sctp_hdr - (u8 *) vlib_buffer_get_current (b0);
+
+         /* Session exists */
+         if (PREDICT_TRUE (0 != sctp_conn))
+           {
+             /* Save connection index */
+             vnet_buffer (b0)->sctp.connection_index = tconn->c_index;
+             vnet_buffer (b0)->sctp.data_offset = n_advance_bytes0;
+             vnet_buffer (b0)->sctp.data_len = n_data_bytes0;
+
+             next0 = tm->dispatch_table[sctp_conn->state][type].next;
+             error0 = tm->dispatch_table[sctp_conn->state][type].error;
+
+             SCTP_DBG_STATE_MACHINE ("CONNECTION_INDEX = %u: "
+                                     "CURRENT_CONNECTION_STATE = %s,"
+                                     "CHUNK_TYPE_RECEIVED = %s "
+                                     "NEXT_PHASE = %s",
+                                     sctp_conn->sub_conn
+                                     [idx].connection.c_index,
+                                     sctp_state_to_string (sctp_conn->state),
+                                     sctp_chunk_to_string (type),
+                                     phase_to_string (next0));
+
+             if (type == DATA)
+               SCTP_ADV_DBG ("n_advance_bytes0 = %u, n_data_bytes0 = %u",
+                             n_advance_bytes0, n_data_bytes0);
+
+           }
+         else
+           {
+             if (is_filtered)
+               {
+                 next0 = SCTP_INPUT_NEXT_DROP;
+                 error0 = SCTP_ERROR_FILTERED;
+               }
+             else if ((is_ip4 && tm->punt_unknown4) ||
+                      (!is_ip4 && tm->punt_unknown6))
+               {
+                 next0 = SCTP_INPUT_NEXT_PUNT_PHASE;
+                 error0 = SCTP_ERROR_PUNT;
+               }
+             else
+               {
+                 next0 = SCTP_INPUT_NEXT_DROP;
+                 error0 = SCTP_ERROR_NO_LISTENER;
+               }
+             SCTP_DBG_STATE_MACHINE ("sctp_conn == NULL, NEXT_PHASE = %s",
+                                     phase_to_string (next0));
+             sctp_conn = 0;
+           }
+
+       done:
+         b0->error = error0 ? node->errors[error0] : 0;
+
+         if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED))
+           {
+             sctp_rx_trace_t *t0 =
+               vlib_add_trace (vm, node, b0, sizeof (*t0));
+             sctp_set_rx_trace_data (t0, sctp_conn, sctp_hdr, b0, is_ip4);
+           }
+         vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next,
+                                          n_left_to_next, bi0, next0);
+       }
+
+      vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+    }
+  return from_frame->n_vectors;
+}
+
+static uword
+sctp4_input_dispatcher (vlib_main_t * vm, vlib_node_runtime_t * node,
+                       vlib_frame_t * from_frame)
+{
+  return sctp46_input_dispatcher (vm, node, from_frame, 1 /* is_ip4 */ );
+}
+
+static uword
+sctp6_input_dispatcher (vlib_main_t * vm, vlib_node_runtime_t * node,
+                       vlib_frame_t * from_frame)
+{
+  return sctp46_input_dispatcher (vm, node, from_frame, 0 /* is_ip4 */ );
+}
+
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE (sctp4_input_node) =
+{
+  .function = sctp4_input_dispatcher,
+  .name = "sctp4-input",
+  /* Takes a vector of packets. */
+  .vector_size = sizeof (u32),
+  .n_errors = SCTP_N_ERROR,
+  .error_strings = sctp_error_strings,
+  .n_next_nodes = SCTP_INPUT_N_NEXT,
+  .next_nodes =
+  {
+#define _(s,n) [SCTP_INPUT_NEXT_##s] = n,
+    foreach_sctp4_input_next
+#undef _
+  },
+  .format_buffer = format_sctp_header,
+  .format_trace = format_sctp_rx_trace,
+};
+/* *INDENT-ON* */
+
+VLIB_NODE_FUNCTION_MULTIARCH (sctp4_input_node, sctp4_input_dispatcher);
+
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE (sctp6_input_node) =
+{
+  .function = sctp6_input_dispatcher,
+  .name = "sctp6-input",
+  /* Takes a vector of packets. */
+  .vector_size = sizeof (u32),
+  .n_errors = SCTP_N_ERROR,
+  .error_strings = sctp_error_strings,
+  .n_next_nodes = SCTP_INPUT_N_NEXT,
+  .next_nodes =
+  {
+#define _(s,n) [SCTP_INPUT_NEXT_##s] = n,
+    foreach_sctp6_input_next
+#undef _
+  },
+  .format_buffer = format_sctp_header,
+  .format_trace = format_sctp_rx_trace,
+};
+/* *INDENT-ON* */
+
+VLIB_NODE_FUNCTION_MULTIARCH (sctp6_input_node, sctp6_input_dispatcher);
+
+vlib_node_registration_t sctp4_input_node;
+vlib_node_registration_t sctp6_input_node;
+
+static void
+sctp_dispatch_table_init (sctp_main_t * tm)
+{
+  int i, j;
+  for (i = 0; i < ARRAY_LEN (tm->dispatch_table); i++)
+    for (j = 0; j < ARRAY_LEN (tm->dispatch_table[i]); j++)
+      {
+       tm->dispatch_table[i][j].next = SCTP_INPUT_NEXT_DROP;
+       tm->dispatch_table[i][j].error = SCTP_ERROR_DISPATCH;
+      }
+
+#define _(t,f,n,e)                                             \
+do {                                                           \
+    tm->dispatch_table[SCTP_STATE_##t][f].next = (n);          \
+    tm->dispatch_table[SCTP_STATE_##t][f].error = (e);         \
+} while (0)
+
+  /*
+   * SCTP STATE-MACHINE states:
+   *
+   * _(CLOSED, "CLOSED")                         \
+   * _(COOKIE_WAIT, "COOKIE_WAIT")               \
+   * _(COOKIE_ECHOED, "COOKIE_ECHOED")           \
+   * _(ESTABLISHED, "ESTABLISHED")               \
+   * _(SHUTDOWN_PENDING, "SHUTDOWN_PENDING")     \
+   * _(SHUTDOWN_SENT, "SHUTDOWN_SENT")           \
+   * _(SHUTDOWN_RECEIVED, "SHUTDOWN_RECEIVED")   \
+   * _(SHUTDOWN_ACK_SENT, "SHUTDOWN_ACK_SENT")
+   */
+  //_(CLOSED, DATA, SCTP_INPUT_NEXT_LISTEN_PHASE, SCTP_ERROR_NONE);     /* UNEXPECTED DATA chunk which requires special handling */
+  _(CLOSED, INIT, SCTP_INPUT_NEXT_LISTEN_PHASE, SCTP_ERROR_NONE);
+  _(CLOSED, INIT_ACK, SCTP_INPUT_NEXT_DROP, SCTP_ERROR_ACK_DUP);       /* UNEXPECTED INIT_ACK chunk */
+  _(CLOSED, SACK, SCTP_INPUT_NEXT_DROP, SCTP_ERROR_SACK_CHUNK_VIOLATION);      /* UNEXPECTED SACK chunk */
+  _(CLOSED, HEARTBEAT, SCTP_INPUT_NEXT_DROP, SCTP_ERROR_HEARTBEAT_CHUNK_VIOLATION);    /* UNEXPECTED HEARTBEAT chunk */
+  _(CLOSED, HEARTBEAT_ACK, SCTP_INPUT_NEXT_DROP, SCTP_ERROR_HEARTBEAT_ACK_CHUNK_VIOLATION);    /* UNEXPECTED HEARTBEAT_ACK chunk */
+  _(CLOSED, ABORT, SCTP_INPUT_NEXT_RCV_PHASE, SCTP_ERROR_NONE);
+  _(CLOSED, SHUTDOWN, SCTP_INPUT_NEXT_DROP, SCTP_ERROR_SHUTDOWN_CHUNK_VIOLATION);      /* UNEXPECTED SHUTDOWN chunk */
+  _(CLOSED, SHUTDOWN_ACK, SCTP_INPUT_NEXT_DROP, SCTP_ERROR_SHUTDOWN_ACK_CHUNK_VIOLATION);      /* UNEXPECTED SHUTDOWN_ACK chunk */
+  _(CLOSED, OPERATION_ERROR, SCTP_INPUT_NEXT_DROP, SCTP_ERROR_OPERATION_ERROR_VIOLATION);      /* UNEXPECTED OPERATION_ERROR chunk */
+  _(CLOSED, COOKIE_ECHO, SCTP_INPUT_NEXT_ESTABLISHED_PHASE, SCTP_ERROR_NONE);
+  _(CLOSED, COOKIE_ACK, SCTP_INPUT_NEXT_DROP, SCTP_ERROR_ACK_DUP);     /* UNEXPECTED COOKIE_ACK chunk */
+  _(CLOSED, ECNE, SCTP_INPUT_NEXT_DROP, SCTP_ERROR_ECNE_VIOLATION);    /* UNEXPECTED ECNE chunk */
+  _(CLOSED, CWR, SCTP_INPUT_NEXT_DROP, SCTP_ERROR_CWR_VIOLATION);      /* UNEXPECTED CWR chunk */
+  _(CLOSED, SHUTDOWN_COMPLETE, SCTP_INPUT_NEXT_DROP, SCTP_ERROR_SHUTDOWN_COMPLETE_VIOLATION);  /* UNEXPECTED SHUTDOWN_COMPLETE chunk */
+
+  _(COOKIE_WAIT, DATA, SCTP_INPUT_NEXT_DROP, SCTP_ERROR_NONE);
+  _(COOKIE_WAIT, INIT, SCTP_INPUT_NEXT_RCV_PHASE, SCTP_ERROR_NONE);    /* UNEXPECTED INIT chunk which requires special handling */
+  _(COOKIE_WAIT, INIT_ACK, SCTP_INPUT_NEXT_RCV_PHASE, SCTP_ERROR_NONE);
+  _(COOKIE_WAIT, SACK, SCTP_INPUT_NEXT_DROP, SCTP_ERROR_SACK_CHUNK_VIOLATION); /* UNEXPECTED SACK chunk */
+  _(COOKIE_WAIT, HEARTBEAT, SCTP_INPUT_NEXT_DROP, SCTP_ERROR_HEARTBEAT_CHUNK_VIOLATION);       /* UNEXPECTED HEARTBEAT chunk */
+  _(COOKIE_WAIT, HEARTBEAT_ACK, SCTP_INPUT_NEXT_DROP, SCTP_ERROR_HEARTBEAT_ACK_CHUNK_VIOLATION);       /* UNEXPECTED HEARTBEAT_ACK chunk */
+  _(COOKIE_WAIT, ABORT, SCTP_INPUT_NEXT_RCV_PHASE, SCTP_ERROR_NONE);
+  _(COOKIE_WAIT, SHUTDOWN, SCTP_INPUT_NEXT_DROP, SCTP_ERROR_SHUTDOWN_CHUNK_VIOLATION); /* UNEXPECTED SHUTDOWN chunk */
+  _(COOKIE_WAIT, SHUTDOWN_ACK, SCTP_INPUT_NEXT_DROP, SCTP_ERROR_SHUTDOWN_ACK_CHUNK_VIOLATION); /* UNEXPECTED SHUTDOWN_ACK chunk */
+  _(COOKIE_WAIT, OPERATION_ERROR, SCTP_INPUT_NEXT_DROP, SCTP_ERROR_OPERATION_ERROR_VIOLATION); /* UNEXPECTED OPERATION_ERROR chunk */
+  _(COOKIE_WAIT, COOKIE_ECHO, SCTP_INPUT_NEXT_DROP, SCTP_ERROR_COOKIE_ECHO_VIOLATION); /* UNEXPECTED COOKIE_ECHO chunk */
+  _(COOKIE_WAIT, COOKIE_ACK, SCTP_INPUT_NEXT_DROP, SCTP_ERROR_ACK_DUP);        /* UNEXPECTED COOKIE_ACK chunk */
+  _(COOKIE_WAIT, ECNE, SCTP_INPUT_NEXT_DROP, SCTP_ERROR_ECNE_VIOLATION);       /* UNEXPECTED ECNE chunk */
+  _(COOKIE_WAIT, CWR, SCTP_INPUT_NEXT_DROP, SCTP_ERROR_CWR_VIOLATION); /* UNEXPECTED CWR chunk */
+  _(COOKIE_WAIT, SHUTDOWN_COMPLETE, SCTP_INPUT_NEXT_DROP, SCTP_ERROR_SHUTDOWN_COMPLETE_VIOLATION);     /* UNEXPECTED SHUTDOWN_COMPLETE chunk */
+
+  _(COOKIE_ECHOED, DATA, SCTP_INPUT_NEXT_DROP, SCTP_ERROR_NONE);
+  _(COOKIE_ECHOED, INIT, SCTP_INPUT_NEXT_RCV_PHASE, SCTP_ERROR_NONE);  /* UNEXPECTED INIT chunk which requires special handling */
+  _(COOKIE_ECHOED, INIT_ACK, SCTP_INPUT_NEXT_DROP, SCTP_ERROR_ACK_DUP);        /* UNEXPECTED INIT_ACK chunk */
+  _(COOKIE_ECHOED, SACK, SCTP_INPUT_NEXT_DROP, SCTP_ERROR_SACK_CHUNK_VIOLATION);       /* UNEXPECTED SACK chunk */
+  _(COOKIE_ECHOED, HEARTBEAT, SCTP_INPUT_NEXT_DROP, SCTP_ERROR_HEARTBEAT_CHUNK_VIOLATION);     /* UNEXPECTED HEARTBEAT chunk */
+  _(COOKIE_ECHOED, HEARTBEAT_ACK, SCTP_INPUT_NEXT_DROP, SCTP_ERROR_HEARTBEAT_ACK_CHUNK_VIOLATION);     /* UNEXPECTED HEARTBEAT_ACK chunk */
+  _(COOKIE_ECHOED, ABORT, SCTP_INPUT_NEXT_DROP, SCTP_ERROR_ABORT_CHUNK_VIOLATION);     /* UNEXPECTED ABORT chunk */
+  _(COOKIE_ECHOED, SHUTDOWN, SCTP_INPUT_NEXT_DROP, SCTP_ERROR_SHUTDOWN_CHUNK_VIOLATION);       /* UNEXPECTED SHUTDOWN chunk */
+  _(COOKIE_ECHOED, SHUTDOWN_ACK, SCTP_INPUT_NEXT_DROP, SCTP_ERROR_SHUTDOWN_ACK_CHUNK_VIOLATION);       /* UNEXPECTED SHUTDOWN_ACK chunk */
+  _(COOKIE_ECHOED, OPERATION_ERROR, SCTP_INPUT_NEXT_DROP, SCTP_ERROR_OPERATION_ERROR_VIOLATION);       /* UNEXPECTED OPERATION_ERROR chunk */
+  _(COOKIE_ECHOED, COOKIE_ECHO, SCTP_INPUT_NEXT_DROP, SCTP_ERROR_COOKIE_ECHO_VIOLATION);       /* UNEXPECTED COOKIE_ECHO chunk */
+  _(COOKIE_ECHOED, COOKIE_ACK, SCTP_INPUT_NEXT_ESTABLISHED_PHASE,
+    SCTP_ERROR_NONE);
+  _(COOKIE_ECHOED, ECNE, SCTP_INPUT_NEXT_DROP, SCTP_ERROR_ECNE_VIOLATION);     /* UNEXPECTED ECNE chunk */
+  _(COOKIE_ECHOED, CWR, SCTP_INPUT_NEXT_DROP, SCTP_ERROR_CWR_VIOLATION);       /* UNEXPECTED CWR chunk */
+  _(COOKIE_ECHOED, SHUTDOWN_COMPLETE, SCTP_INPUT_NEXT_DROP, SCTP_ERROR_SHUTDOWN_COMPLETE_VIOLATION);   /* UNEXPECTED SHUTDOWN_COMPLETE chunk */
+
+  _(ESTABLISHED, DATA, SCTP_INPUT_NEXT_ESTABLISHED_PHASE, SCTP_ERROR_NONE);
+  _(ESTABLISHED, INIT, SCTP_INPUT_NEXT_DROP, SCTP_ERROR_INIT_CHUNK_VIOLATION); /* UNEXPECTED INIT chunk */
+  _(ESTABLISHED, INIT_ACK, SCTP_INPUT_NEXT_DROP, SCTP_ERROR_ACK_DUP);  /* UNEXPECTED INIT_ACK chunk */
+  _(ESTABLISHED, SACK, SCTP_INPUT_NEXT_ESTABLISHED_PHASE, SCTP_ERROR_NONE);
+  _(ESTABLISHED, HEARTBEAT, SCTP_INPUT_NEXT_ESTABLISHED_PHASE,
+    SCTP_ERROR_NONE);
+  _(ESTABLISHED, HEARTBEAT_ACK, SCTP_INPUT_NEXT_ESTABLISHED_PHASE,
+    SCTP_ERROR_NONE);
+  _(ESTABLISHED, ABORT, SCTP_INPUT_NEXT_DROP, SCTP_ERROR_ABORT_CHUNK_VIOLATION);       /* UNEXPECTED ABORT chunk */
+  _(ESTABLISHED, SHUTDOWN, SCTP_INPUT_NEXT_SHUTDOWN_PHASE, SCTP_ERROR_NONE);
+  _(ESTABLISHED, SHUTDOWN_ACK, SCTP_INPUT_NEXT_DROP, SCTP_ERROR_SHUTDOWN_ACK_CHUNK_VIOLATION); /* UNEXPECTED SHUTDOWN_ACK chunk */
+  _(ESTABLISHED, OPERATION_ERROR, SCTP_INPUT_NEXT_DROP, SCTP_ERROR_OPERATION_ERROR_VIOLATION); /* UNEXPECTED OPERATION_ERROR chunk */
+  _(ESTABLISHED, COOKIE_ECHO, SCTP_INPUT_NEXT_DROP, SCTP_ERROR_COOKIE_ECHO_VIOLATION); /* UNEXPECTED COOKIE_ECHO chunk */
+  _(ESTABLISHED, COOKIE_ACK, SCTP_INPUT_NEXT_DROP, SCTP_ERROR_ACK_DUP);        /* UNEXPECTED COOKIE_ACK chunk */
+  _(ESTABLISHED, ECNE, SCTP_INPUT_NEXT_DROP, SCTP_ERROR_ECNE_VIOLATION);       /* UNEXPECTED ECNE chunk */
+  _(ESTABLISHED, CWR, SCTP_INPUT_NEXT_DROP, SCTP_ERROR_CWR_VIOLATION); /* UNEXPECTED CWR chunk */
+  _(ESTABLISHED, SHUTDOWN_COMPLETE, SCTP_INPUT_NEXT_DROP, SCTP_ERROR_SHUTDOWN_COMPLETE_VIOLATION);     /* UNEXPECTED SHUTDOWN_COMPLETE chunk */
+
+  _(SHUTDOWN_PENDING, DATA, SCTP_INPUT_NEXT_SHUTDOWN_PHASE, SCTP_ERROR_NONE);
+  _(SHUTDOWN_PENDING, INIT, SCTP_INPUT_NEXT_DROP, SCTP_ERROR_INIT_CHUNK_VIOLATION);    /* UNEXPECTED INIT chunk */
+  _(SHUTDOWN_PENDING, INIT_ACK, SCTP_INPUT_NEXT_DROP, SCTP_ERROR_ACK_DUP);     /* UNEXPECTED INIT_ACK chunk */
+  _(SHUTDOWN_PENDING, SACK, SCTP_INPUT_NEXT_LISTEN_PHASE, SCTP_ERROR_NONE);
+  _(SHUTDOWN_PENDING, HEARTBEAT, SCTP_INPUT_NEXT_LISTEN_PHASE,
+    SCTP_ERROR_NONE);
+  _(SHUTDOWN_PENDING, HEARTBEAT_ACK, SCTP_INPUT_NEXT_LISTEN_PHASE,
+    SCTP_ERROR_NONE);
+  _(SHUTDOWN_PENDING, ABORT, SCTP_INPUT_NEXT_DROP, SCTP_ERROR_ABORT_CHUNK_VIOLATION);  /* UNEXPECTED ABORT chunk */
+  _(SHUTDOWN_PENDING, SHUTDOWN, SCTP_INPUT_NEXT_SHUTDOWN_PHASE,
+    SCTP_ERROR_NONE);
+  _(SHUTDOWN_PENDING, SHUTDOWN_ACK, SCTP_INPUT_NEXT_DROP, SCTP_ERROR_SHUTDOWN_ACK_CHUNK_VIOLATION);    /* UNEXPECTED SHUTDOWN_ACK chunk */
+  _(SHUTDOWN_PENDING, OPERATION_ERROR, SCTP_INPUT_NEXT_DROP, SCTP_ERROR_OPERATION_ERROR_VIOLATION);    /* UNEXPECTED OPERATION_ERROR chunk */
+  _(SHUTDOWN_PENDING, COOKIE_ECHO, SCTP_INPUT_NEXT_DROP, SCTP_ERROR_COOKIE_ECHO_VIOLATION);    /* UNEXPECTED COOKIE_ECHO chunk */
+  _(SHUTDOWN_PENDING, COOKIE_ACK, SCTP_INPUT_NEXT_DROP, SCTP_ERROR_ACK_DUP);   /* UNEXPECTED COOKIE_ACK chunk */
+  _(SHUTDOWN_PENDING, ECNE, SCTP_INPUT_NEXT_DROP, SCTP_ERROR_ECNE_VIOLATION);  /* UNEXPECTED ECNE chunk */
+  _(SHUTDOWN_PENDING, CWR, SCTP_INPUT_NEXT_DROP, SCTP_ERROR_CWR_VIOLATION);    /* UNEXPECTED CWR chunk */
+  _(SHUTDOWN_PENDING, SHUTDOWN_COMPLETE, SCTP_INPUT_NEXT_DROP, SCTP_ERROR_SHUTDOWN_COMPLETE_VIOLATION);        /* UNEXPECTED SHUTDOWN_COMPLETE chunk */
+
+  _(SHUTDOWN_SENT, DATA, SCTP_INPUT_NEXT_SHUTDOWN_PHASE, SCTP_ERROR_NONE);
+  _(SHUTDOWN_SENT, INIT, SCTP_INPUT_NEXT_DROP, SCTP_ERROR_INIT_CHUNK_VIOLATION);       /* UNEXPECTED INIT chunk */
+  _(SHUTDOWN_SENT, INIT_ACK, SCTP_INPUT_NEXT_DROP, SCTP_ERROR_ACK_DUP);        /* UNEXPECTED INIT_ACK chunk */
+  _(SHUTDOWN_SENT, SACK, SCTP_INPUT_NEXT_DROP, SCTP_ERROR_SACK_CHUNK_VIOLATION);       /* UNEXPECTED SACK chunk */
+  _(SHUTDOWN_SENT, HEARTBEAT, SCTP_INPUT_NEXT_DROP, SCTP_ERROR_HEARTBEAT_CHUNK_VIOLATION);     /* UNEXPECTED HEARTBEAT chunk */
+  _(SHUTDOWN_SENT, HEARTBEAT_ACK, SCTP_INPUT_NEXT_DROP, SCTP_ERROR_HEARTBEAT_ACK_CHUNK_VIOLATION);     /* UNEXPECTED HEARTBEAT_ACK chunk */
+  _(SHUTDOWN_SENT, ABORT, SCTP_INPUT_NEXT_DROP, SCTP_ERROR_ABORT_CHUNK_VIOLATION);     /* UNEXPECTED ABORT chunk */
+  _(SHUTDOWN_SENT, SHUTDOWN, SCTP_INPUT_NEXT_SHUTDOWN_PHASE, SCTP_ERROR_NONE);
+  _(SHUTDOWN_SENT, SHUTDOWN_ACK, SCTP_INPUT_NEXT_SHUTDOWN_PHASE,
+    SCTP_ERROR_NONE);
+  _(SHUTDOWN_SENT, COOKIE_ECHO, SCTP_INPUT_NEXT_DROP, SCTP_ERROR_COOKIE_ECHO_VIOLATION);       /* UNEXPECTED COOKIE_ECHO chunk */
+  _(SHUTDOWN_SENT, COOKIE_ACK, SCTP_INPUT_NEXT_DROP, SCTP_ERROR_ACK_DUP);      /* UNEXPECTED COOKIE_ACK chunk */
+  _(SHUTDOWN_SENT, ECNE, SCTP_INPUT_NEXT_DROP, SCTP_ERROR_ECNE_VIOLATION);     /* UNEXPECTED ECNE chunk */
+  _(SHUTDOWN_SENT, CWR, SCTP_INPUT_NEXT_DROP, SCTP_ERROR_CWR_VIOLATION);       /* UNEXPECTED CWR chunk */
+  _(SHUTDOWN_SENT, SHUTDOWN_COMPLETE, SCTP_INPUT_NEXT_DROP, SCTP_ERROR_SHUTDOWN_COMPLETE_VIOLATION);   /* UNEXPECTED SHUTDOWN_COMPLETE chunk */
+
+  _(SHUTDOWN_RECEIVED, DATA, SCTP_INPUT_NEXT_DROP, SCTP_ERROR_DATA_CHUNK_VIOLATION);   /* UNEXPECTED DATA chunk */
+  _(SHUTDOWN_RECEIVED, INIT, SCTP_INPUT_NEXT_DROP, SCTP_ERROR_INIT_CHUNK_VIOLATION);   /* UNEXPECTED INIT chunk */
+  _(SHUTDOWN_RECEIVED, INIT_ACK, SCTP_INPUT_NEXT_DROP, SCTP_ERROR_ACK_DUP);    /* UNEXPECTED INIT_ACK chunk */
+  _(SHUTDOWN_RECEIVED, SACK, SCTP_INPUT_NEXT_DROP, SCTP_ERROR_SACK_CHUNK_VIOLATION);   /* UNEXPECTED INIT chunk */
+  _(SHUTDOWN_RECEIVED, HEARTBEAT, SCTP_INPUT_NEXT_DROP, SCTP_ERROR_HEARTBEAT_CHUNK_VIOLATION); /* UNEXPECTED HEARTBEAT chunk */
+  _(SHUTDOWN_RECEIVED, HEARTBEAT_ACK, SCTP_INPUT_NEXT_DROP, SCTP_ERROR_HEARTBEAT_ACK_CHUNK_VIOLATION); /* UNEXPECTED HEARTBEAT_ACK chunk */
+  _(SHUTDOWN_RECEIVED, ABORT, SCTP_INPUT_NEXT_DROP, SCTP_ERROR_ABORT_CHUNK_VIOLATION); /* UNEXPECTED ABORT chunk */
+  _(SHUTDOWN_RECEIVED, SHUTDOWN, SCTP_INPUT_NEXT_DROP, SCTP_ERROR_SHUTDOWN_CHUNK_VIOLATION);   /* UNEXPECTED SHUTDOWN chunk */
+  _(SHUTDOWN_RECEIVED, SHUTDOWN_ACK, SCTP_INPUT_NEXT_SHUTDOWN_PHASE,
+    SCTP_ERROR_NONE);
+  _(SHUTDOWN_RECEIVED, COOKIE_ECHO, SCTP_INPUT_NEXT_DROP, SCTP_ERROR_COOKIE_ECHO_VIOLATION);   /* UNEXPECTED COOKIE_ECHO chunk */
+  _(SHUTDOWN_RECEIVED, COOKIE_ACK, SCTP_INPUT_NEXT_DROP, SCTP_ERROR_ACK_DUP);  /* UNEXPECTED COOKIE_ACK chunk */
+  _(SHUTDOWN_RECEIVED, ECNE, SCTP_INPUT_NEXT_DROP, SCTP_ERROR_ECNE_VIOLATION); /* UNEXPECTED ECNE chunk */
+  _(SHUTDOWN_RECEIVED, CWR, SCTP_INPUT_NEXT_DROP, SCTP_ERROR_CWR_VIOLATION);   /* UNEXPECTED CWR chunk */
+  _(SHUTDOWN_RECEIVED, SHUTDOWN_COMPLETE, SCTP_INPUT_NEXT_DROP, SCTP_ERROR_SHUTDOWN_COMPLETE_VIOLATION);       /* UNEXPECTED SHUTDOWN_COMPLETE chunk */
+
+  _(SHUTDOWN_ACK_SENT, DATA, SCTP_INPUT_NEXT_DROP, SCTP_ERROR_DATA_CHUNK_VIOLATION);   /* UNEXPECTED DATA chunk */
+  _(SHUTDOWN_ACK_SENT, INIT, SCTP_INPUT_NEXT_DROP, SCTP_ERROR_INIT_CHUNK_VIOLATION);   /* UNEXPECTED INIT chunk */
+  _(SHUTDOWN_ACK_SENT, INIT_ACK, SCTP_INPUT_NEXT_DROP, SCTP_ERROR_ACK_DUP);    /* UNEXPECTED INIT_ACK chunk */
+  _(SHUTDOWN_ACK_SENT, SACK, SCTP_INPUT_NEXT_DROP, SCTP_ERROR_SACK_CHUNK_VIOLATION);   /* UNEXPECTED INIT chunk */
+  _(SHUTDOWN_ACK_SENT, HEARTBEAT, SCTP_INPUT_NEXT_DROP, SCTP_ERROR_HEARTBEAT_CHUNK_VIOLATION); /* UNEXPECTED HEARTBEAT chunk */
+  _(SHUTDOWN_ACK_SENT, HEARTBEAT_ACK, SCTP_INPUT_NEXT_DROP, SCTP_ERROR_HEARTBEAT_ACK_CHUNK_VIOLATION); /* UNEXPECTED HEARTBEAT_ACK chunk */
+  _(SHUTDOWN_ACK_SENT, ABORT, SCTP_INPUT_NEXT_DROP, SCTP_ERROR_ABORT_CHUNK_VIOLATION); /* UNEXPECTED ABORT chunk */
+  _(SHUTDOWN_ACK_SENT, SHUTDOWN, SCTP_INPUT_NEXT_DROP, SCTP_ERROR_SHUTDOWN_CHUNK_VIOLATION);   /* UNEXPECTED SHUTDOWN chunk */
+  _(SHUTDOWN_ACK_SENT, SHUTDOWN_ACK, SCTP_INPUT_NEXT_DROP, SCTP_ERROR_SHUTDOWN_ACK_CHUNK_VIOLATION);   /* UNEXPECTED SHUTDOWN_ACK chunk */
+  _(SHUTDOWN_ACK_SENT, COOKIE_ECHO, SCTP_INPUT_NEXT_DROP, SCTP_ERROR_COOKIE_ECHO_VIOLATION);   /* UNEXPECTED COOKIE_ECHO chunk */
+  _(SHUTDOWN_ACK_SENT, COOKIE_ACK, SCTP_INPUT_NEXT_DROP, SCTP_ERROR_ACK_DUP);  /* UNEXPECTED COOKIE_ACK chunk */
+  _(SHUTDOWN_ACK_SENT, ECNE, SCTP_INPUT_NEXT_DROP, SCTP_ERROR_ECNE_VIOLATION); /* UNEXPECTED ECNE chunk */
+  _(SHUTDOWN_ACK_SENT, CWR, SCTP_INPUT_NEXT_DROP, SCTP_ERROR_CWR_VIOLATION);   /* UNEXPECTED CWR chunk */
+  _(SHUTDOWN_ACK_SENT, SHUTDOWN_COMPLETE, SCTP_INPUT_NEXT_SHUTDOWN_PHASE,
+    SCTP_ERROR_NONE);
+
+  /* TODO: Handle COOKIE ECHO when a TCB Exists */
+
+#undef _
+}
+
+clib_error_t *
+sctp_input_init (vlib_main_t * vm)
+{
+  clib_error_t *error = 0;
+  sctp_main_t *tm = vnet_get_sctp_main ();
+
+  if ((error = vlib_call_init_function (vm, sctp_init)))
+    return error;
+
+  /* Initialize dispatch table. */
+  sctp_dispatch_table_init (tm);
+
+  return error;
+}
+
+VLIB_INIT_FUNCTION (sctp_input_init);
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/sctp/sctp_output.c b/src/vnet/sctp/sctp_output.c
new file mode 100644 (file)
index 0000000..841444e
--- /dev/null
@@ -0,0 +1,1331 @@
+/*
+ * Copyright (c) 2017 SUSE LLC.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include <vnet/sctp/sctp.h>
+#include <vnet/sctp/sctp_debug.h>
+#include <vppinfra/random.h>
+
+vlib_node_registration_t sctp4_output_node;
+vlib_node_registration_t sctp6_output_node;
+
+typedef enum _sctp_output_next
+{
+  SCTP_OUTPUT_NEXT_DROP,
+  SCTP_OUTPUT_NEXT_IP_LOOKUP,
+  SCTP_OUTPUT_N_NEXT
+} sctp_output_next_t;
+
+#define foreach_sctp4_output_next                      \
+  _ (DROP, "error-drop")                        \
+  _ (IP_LOOKUP, "ip4-lookup")
+
+#define foreach_sctp6_output_next                      \
+  _ (DROP, "error-drop")                        \
+  _ (IP_LOOKUP, "ip6-lookup")
+
+static char *sctp_error_strings[] = {
+#define sctp_error(n,s) s,
+#include <vnet/sctp/sctp_error.def>
+#undef sctp_error
+};
+
+typedef struct
+{
+  sctp_header_t sctp_header;
+  sctp_connection_t sctp_connection;
+} sctp_tx_trace_t;
+
+/**
+ * Flush tx frame populated by retransmits and timer pops
+ */
+void
+sctp_flush_frame_to_output (vlib_main_t * vm, u8 thread_index, u8 is_ip4)
+{
+  if (sctp_main.tx_frames[!is_ip4][thread_index])
+    {
+      u32 next_index;
+      next_index = is_ip4 ? sctp4_output_node.index : sctp6_output_node.index;
+      vlib_put_frame_to_node (vm, next_index,
+                             sctp_main.tx_frames[!is_ip4][thread_index]);
+      sctp_main.tx_frames[!is_ip4][thread_index] = 0;
+    }
+}
+
+/**
+ * Flush ip lookup tx frames populated by timer pops
+ */
+always_inline void
+sctp_flush_frame_to_ip_lookup (vlib_main_t * vm, u8 thread_index, u8 is_ip4)
+{
+  if (sctp_main.ip_lookup_tx_frames[!is_ip4][thread_index])
+    {
+      u32 next_index;
+      next_index = is_ip4 ? ip4_lookup_node.index : ip6_lookup_node.index;
+      vlib_put_frame_to_node (vm, next_index,
+                             sctp_main.ip_lookup_tx_frames[!is_ip4]
+                             [thread_index]);
+      sctp_main.ip_lookup_tx_frames[!is_ip4][thread_index] = 0;
+    }
+}
+
+/**
+ * Flush v4 and v6 sctp and ip-lookup tx frames for thread index
+ */
+void
+sctp_flush_frames_to_output (u8 thread_index)
+{
+  vlib_main_t *vm = vlib_get_main ();
+  sctp_flush_frame_to_output (vm, thread_index, 1);
+  sctp_flush_frame_to_output (vm, thread_index, 0);
+  sctp_flush_frame_to_ip_lookup (vm, thread_index, 1);
+  sctp_flush_frame_to_ip_lookup (vm, thread_index, 0);
+}
+
+u32
+ip4_sctp_compute_checksum (vlib_main_t * vm, vlib_buffer_t * p0,
+                          ip4_header_t * ip0)
+{
+  ip_csum_t checksum;
+  u32 ip_header_length, payload_length_host_byte_order;
+  u32 n_this_buffer, n_bytes_left, n_ip_bytes_this_buffer;
+  void *data_this_buffer;
+
+  /* Initialize checksum with ip header. */
+  ip_header_length = ip4_header_bytes (ip0);
+  payload_length_host_byte_order =
+    clib_net_to_host_u16 (ip0->length) - ip_header_length;
+  checksum =
+    clib_host_to_net_u32 (payload_length_host_byte_order +
+                         (ip0->protocol << 16));
+
+  if (BITS (uword) == 32)
+    {
+      checksum =
+       ip_csum_with_carry (checksum,
+                           clib_mem_unaligned (&ip0->src_address, u32));
+      checksum =
+       ip_csum_with_carry (checksum,
+                           clib_mem_unaligned (&ip0->dst_address, u32));
+    }
+  else
+    checksum =
+      ip_csum_with_carry (checksum,
+                         clib_mem_unaligned (&ip0->src_address, u64));
+
+  n_bytes_left = n_this_buffer = payload_length_host_byte_order;
+  data_this_buffer = (void *) ip0 + ip_header_length;
+  n_ip_bytes_this_buffer =
+    p0->current_length - (((u8 *) ip0 - p0->data) - p0->current_data);
+  if (n_this_buffer + ip_header_length > n_ip_bytes_this_buffer)
+    {
+      n_this_buffer = n_ip_bytes_this_buffer > ip_header_length ?
+       n_ip_bytes_this_buffer - ip_header_length : 0;
+    }
+  while (1)
+    {
+      checksum =
+       ip_incremental_checksum (checksum, data_this_buffer, n_this_buffer);
+      n_bytes_left -= n_this_buffer;
+      if (n_bytes_left == 0)
+       break;
+
+      ASSERT (p0->flags & VLIB_BUFFER_NEXT_PRESENT);
+      p0 = vlib_get_buffer (vm, p0->next_buffer);
+      data_this_buffer = vlib_buffer_get_current (p0);
+      n_this_buffer = p0->current_length;
+    }
+
+  return checksum;
+}
+
+u32
+ip6_sctp_compute_checksum (vlib_main_t * vm, vlib_buffer_t * p0,
+                          ip6_header_t * ip0, int *bogus_lengthp)
+{
+  ip_csum_t checksum;
+  u16 payload_length_host_byte_order;
+  u32 i, n_this_buffer, n_bytes_left;
+  u32 headers_size = sizeof (ip0[0]);
+  void *data_this_buffer;
+
+  ASSERT (bogus_lengthp);
+  *bogus_lengthp = 0;
+
+  /* Initialize checksum with ip header. */
+  checksum = ip0->payload_length + clib_host_to_net_u16 (ip0->protocol);
+  payload_length_host_byte_order = clib_net_to_host_u16 (ip0->payload_length);
+  data_this_buffer = (void *) (ip0 + 1);
+
+  for (i = 0; i < ARRAY_LEN (ip0->src_address.as_uword); i++)
+    {
+      checksum = ip_csum_with_carry (checksum,
+                                    clib_mem_unaligned (&ip0->
+                                                        src_address.as_uword
+                                                        [i], uword));
+      checksum =
+       ip_csum_with_carry (checksum,
+                           clib_mem_unaligned (&ip0->dst_address.as_uword[i],
+                                               uword));
+    }
+
+  /* some icmp packets may come with a "router alert" hop-by-hop extension header (e.g., mldv2 packets)
+   * or UDP-Ping packets */
+  if (PREDICT_FALSE (ip0->protocol == IP_PROTOCOL_IP6_HOP_BY_HOP_OPTIONS))
+    {
+      u32 skip_bytes;
+      ip6_hop_by_hop_ext_t *ext_hdr =
+       (ip6_hop_by_hop_ext_t *) data_this_buffer;
+
+      /* validate really icmp6 next */
+      ASSERT ((ext_hdr->next_hdr == IP_PROTOCOL_SCTP));
+
+      skip_bytes = 8 * (1 + ext_hdr->n_data_u64s);
+      data_this_buffer = (void *) ((u8 *) data_this_buffer + skip_bytes);
+
+      payload_length_host_byte_order -= skip_bytes;
+      headers_size += skip_bytes;
+    }
+
+  n_bytes_left = n_this_buffer = payload_length_host_byte_order;
+  if (p0 && n_this_buffer + headers_size > p0->current_length)
+    n_this_buffer =
+      p0->current_length >
+      headers_size ? p0->current_length - headers_size : 0;
+  while (1)
+    {
+      checksum =
+       ip_incremental_checksum (checksum, data_this_buffer, n_this_buffer);
+      n_bytes_left -= n_this_buffer;
+      if (n_bytes_left == 0)
+       break;
+
+      if (!(p0->flags & VLIB_BUFFER_NEXT_PRESENT))
+       {
+         *bogus_lengthp = 1;
+         return 0xfefe;
+       }
+      p0 = vlib_get_buffer (vm, p0->next_buffer);
+      data_this_buffer = vlib_buffer_get_current (p0);
+      n_this_buffer = p0->current_length;
+    }
+
+  return checksum;
+}
+
+void
+sctp_push_ip_hdr (sctp_main_t * tm, sctp_sub_connection_t * tc,
+                 vlib_buffer_t * b)
+{
+  sctp_header_t *th = vlib_buffer_get_current (b);
+  vlib_main_t *vm = vlib_get_main ();
+  if (tc->c_is_ip4)
+    {
+      ip4_header_t *ih;
+      ih = vlib_buffer_push_ip4 (vm, b, &tc->c_lcl_ip4,
+                                &tc->c_rmt_ip4, IP_PROTOCOL_SCTP, 1);
+      th->checksum = ip4_sctp_compute_checksum (vm, b, ih);
+    }
+  else
+    {
+      ip6_header_t *ih;
+      int bogus = ~0;
+
+      ih = vlib_buffer_push_ip6 (vm, b, &tc->c_lcl_ip6,
+                                &tc->c_rmt_ip6, IP_PROTOCOL_SCTP);
+      th->checksum = ip6_sctp_compute_checksum (vm, b, ih, &bogus);
+      ASSERT (!bogus);
+    }
+}
+
+always_inline void *
+sctp_reuse_buffer (vlib_main_t * vm, vlib_buffer_t * b)
+{
+  if (b->flags & VLIB_BUFFER_NEXT_PRESENT)
+    vlib_buffer_free_one (vm, b->next_buffer);
+  /* Zero all flags but free list index and trace flag */
+  b->flags &= VLIB_BUFFER_NEXT_PRESENT - 1;
+  b->current_data = 0;
+  b->current_length = 0;
+  b->total_length_not_including_first_buffer = 0;
+  vnet_buffer (b)->sctp.flags = 0;
+
+  /* Leave enough space for headers */
+  return vlib_buffer_make_headroom (b, MAX_HDRS_LEN);
+}
+
+always_inline void *
+sctp_init_buffer (vlib_main_t * vm, vlib_buffer_t * b)
+{
+  ASSERT ((b->flags & VLIB_BUFFER_NEXT_PRESENT) == 0);
+  b->flags &= VLIB_BUFFER_FREE_LIST_INDEX_MASK;
+  b->flags |= VNET_BUFFER_F_LOCALLY_ORIGINATED;
+  b->total_length_not_including_first_buffer = 0;
+  vnet_buffer (b)->sctp.flags = 0;
+  VLIB_BUFFER_TRACE_TRAJECTORY_INIT (b);
+  /* Leave enough space for headers */
+  return vlib_buffer_make_headroom (b, MAX_HDRS_LEN);
+}
+
+always_inline int
+sctp_alloc_tx_buffers (sctp_main_t * tm, u8 thread_index, u32 n_free_buffers)
+{
+  vlib_main_t *vm = vlib_get_main ();
+  u32 current_length = vec_len (tm->tx_buffers[thread_index]);
+  u32 n_allocated;
+
+  vec_validate (tm->tx_buffers[thread_index],
+               current_length + n_free_buffers - 1);
+  n_allocated =
+    vlib_buffer_alloc (vm, &tm->tx_buffers[thread_index][current_length],
+                      n_free_buffers);
+  _vec_len (tm->tx_buffers[thread_index]) = current_length + n_allocated;
+  /* buffer shortage, report failure */
+  if (vec_len (tm->tx_buffers[thread_index]) == 0)
+    {
+      clib_warning ("out of buffers");
+      return -1;
+    }
+  return 0;
+}
+
+always_inline int
+sctp_get_free_buffer_index (sctp_main_t * tm, u32 * bidx)
+{
+  u32 *my_tx_buffers;
+  u32 thread_index = vlib_get_thread_index ();
+  if (PREDICT_FALSE (vec_len (tm->tx_buffers[thread_index]) == 0))
+    {
+      if (sctp_alloc_tx_buffers (tm, thread_index, VLIB_FRAME_SIZE))
+       return -1;
+    }
+  my_tx_buffers = tm->tx_buffers[thread_index];
+  *bidx = my_tx_buffers[vec_len (my_tx_buffers) - 1];
+  _vec_len (my_tx_buffers) -= 1;
+  return 0;
+}
+
+always_inline void
+sctp_enqueue_to_output_i (vlib_main_t * vm, vlib_buffer_t * b, u32 bi,
+                         u8 is_ip4, u8 flush)
+{
+  sctp_main_t *tm = vnet_get_sctp_main ();
+  u32 thread_index = vlib_get_thread_index ();
+  u32 *to_next, next_index;
+  vlib_frame_t *f;
+
+  b->flags |= VNET_BUFFER_F_LOCALLY_ORIGINATED;
+  b->error = 0;
+
+  /* Decide where to send the packet */
+  next_index = is_ip4 ? sctp4_output_node.index : sctp6_output_node.index;
+  sctp_trajectory_add_start (b, 2);
+
+  /* Get frame to v4/6 output node */
+  f = tm->tx_frames[!is_ip4][thread_index];
+  if (!f)
+    {
+      f = vlib_get_frame_to_node (vm, next_index);
+      ASSERT (f);
+      tm->tx_frames[!is_ip4][thread_index] = f;
+    }
+  to_next = vlib_frame_vector_args (f);
+  to_next[f->n_vectors] = bi;
+  f->n_vectors += 1;
+  if (flush || f->n_vectors == VLIB_FRAME_SIZE)
+    {
+      vlib_put_frame_to_node (vm, next_index, f);
+      tm->tx_frames[!is_ip4][thread_index] = 0;
+    }
+}
+
+always_inline void
+sctp_enqueue_to_output_now (vlib_main_t * vm, vlib_buffer_t * b, u32 bi,
+                           u8 is_ip4)
+{
+  sctp_enqueue_to_output_i (vm, b, bi, is_ip4, 1);
+}
+
+always_inline void
+sctp_enqueue_to_ip_lookup_i (vlib_main_t * vm, vlib_buffer_t * b, u32 bi,
+                            u8 is_ip4, u8 flush)
+{
+  sctp_main_t *tm = vnet_get_sctp_main ();
+  u32 thread_index = vlib_get_thread_index ();
+  u32 *to_next, next_index;
+  vlib_frame_t *f;
+
+  b->flags |= VNET_BUFFER_F_LOCALLY_ORIGINATED;
+  b->error = 0;
+
+  /* Default FIB for now */
+  vnet_buffer (b)->sw_if_index[VLIB_TX] = 0;
+
+  /* Send to IP lookup */
+  next_index = is_ip4 ? ip4_lookup_node.index : ip6_lookup_node.index;
+  if (VLIB_BUFFER_TRACE_TRAJECTORY > 0)
+    {
+      b->pre_data[0] = 2;
+      b->pre_data[1] = next_index;
+    }
+
+  f = tm->ip_lookup_tx_frames[!is_ip4][thread_index];
+  if (!f)
+    {
+      f = vlib_get_frame_to_node (vm, next_index);
+      ASSERT (f);
+      tm->ip_lookup_tx_frames[!is_ip4][thread_index] = f;
+    }
+
+  to_next = vlib_frame_vector_args (f);
+  to_next[f->n_vectors] = bi;
+  f->n_vectors += 1;
+  if (flush || f->n_vectors == VLIB_FRAME_SIZE)
+    {
+      vlib_put_frame_to_node (vm, next_index, f);
+      tm->ip_lookup_tx_frames[!is_ip4][thread_index] = 0;
+    }
+}
+
+always_inline void
+sctp_enqueue_to_ip_lookup (vlib_main_t * vm, vlib_buffer_t * b, u32 bi,
+                          u8 is_ip4)
+{
+  sctp_enqueue_to_ip_lookup_i (vm, b, bi, is_ip4, 0);
+}
+
+always_inline void
+sctp_enqueue_to_ip_lookup_now (vlib_main_t * vm, vlib_buffer_t * b, u32 bi,
+                              u8 is_ip4)
+{
+  sctp_enqueue_to_ip_lookup_i (vm, b, bi, is_ip4, 1);
+}
+
+/**
+ * Convert buffer to INIT
+ */
+void
+sctp_prepare_init_chunk (sctp_connection_t * sctp_conn, vlib_buffer_t * b)
+{
+  u32 random_seed = random_default_seed ();
+  u16 alloc_bytes = sizeof (sctp_init_chunk_t);
+  sctp_sub_connection_t *sub_conn =
+    &sctp_conn->sub_conn[sctp_pick_conn_idx_on_chunk (INIT)];
+
+  sctp_ipv4_addr_param_t *ip4_param = 0;
+  sctp_ipv6_addr_param_t *ip6_param = 0;
+
+  if (sub_conn->c_is_ip4)
+    alloc_bytes += sizeof (sctp_ipv4_addr_param_t);
+  else
+    alloc_bytes += sizeof (sctp_ipv6_addr_param_t);
+
+  /* As per RFC 4960 the chunk_length value does NOT contemplate
+   * the size of the first header (see sctp_header_t) and any padding
+   */
+  u16 chunk_len = alloc_bytes - sizeof (sctp_header_t);
+
+  alloc_bytes += vnet_sctp_calculate_padding (alloc_bytes);
+
+  sctp_init_chunk_t *init_chunk = vlib_buffer_push_uninit (b, alloc_bytes);
+
+  u16 pointer_offset = sizeof (init_chunk);
+  if (sub_conn->c_is_ip4)
+    {
+      ip4_param = (sctp_ipv4_addr_param_t *) init_chunk + pointer_offset;
+      ip4_param->address.as_u32 = sub_conn->c_lcl_ip.ip4.as_u32;
+
+      pointer_offset += sizeof (sctp_ipv4_addr_param_t);
+    }
+  else
+    {
+      ip6_param = (sctp_ipv6_addr_param_t *) init_chunk + pointer_offset;
+      ip6_param->address.as_u64[0] = sub_conn->c_lcl_ip.ip6.as_u64[0];
+      ip6_param->address.as_u64[1] = sub_conn->c_lcl_ip.ip6.as_u64[1];
+
+      pointer_offset += sizeof (sctp_ipv6_addr_param_t);
+    }
+
+  init_chunk->sctp_hdr.src_port = sub_conn->c_lcl_port;        /* No need of host_to_net conversion, already in net-byte order */
+  init_chunk->sctp_hdr.dst_port = sub_conn->c_rmt_port;        /* No need of host_to_net conversion, already in net-byte order */
+  init_chunk->sctp_hdr.checksum = 0;
+  /* The sender of an INIT must set the VERIFICATION_TAG to 0 as per RFC 4960 Section 8.5.1 */
+  init_chunk->sctp_hdr.verification_tag = 0x0;
+
+  vnet_sctp_set_chunk_type (&init_chunk->chunk_hdr, INIT);
+  vnet_sctp_set_chunk_length (&init_chunk->chunk_hdr, chunk_len);
+  vnet_sctp_common_hdr_params_host_to_net (&init_chunk->chunk_hdr);
+
+  init_chunk->a_rwnd = clib_host_to_net_u32 (DEFAULT_A_RWND);
+  init_chunk->initiate_tag = clib_host_to_net_u32 (random_u32 (&random_seed));
+  init_chunk->inboud_streams_count =
+    clib_host_to_net_u16 (INBOUND_STREAMS_COUNT);
+  init_chunk->outbound_streams_count =
+    clib_host_to_net_u16 (OUTBOUND_STREAMS_COUNT);
+
+  sctp_conn->local_tag = init_chunk->initiate_tag;
+
+  vnet_buffer (b)->sctp.connection_index = sub_conn->c_c_index;
+
+  SCTP_DBG_STATE_MACHINE ("CONN_INDEX = %u, CURR_CONN_STATE = %u (%s), "
+                         "CHUNK_TYPE = %s, "
+                         "SRC_PORT = %u, DST_PORT = %u",
+                         sub_conn->connection.c_index,
+                         sctp_conn->state,
+                         sctp_state_to_string (sctp_conn->state),
+                         sctp_chunk_to_string (INIT),
+                         init_chunk->sctp_hdr.src_port,
+                         init_chunk->sctp_hdr.dst_port);
+}
+
+u64
+sctp_compute_mac ()
+{
+  return 0x0;
+}
+
+void
+sctp_prepare_cookie_ack_chunk (sctp_connection_t * tc, vlib_buffer_t * b)
+{
+  vlib_main_t *vm = vlib_get_main ();
+  u8 idx = sctp_pick_conn_idx_on_chunk (COOKIE_ACK);
+
+  sctp_reuse_buffer (vm, b);
+
+  u16 alloc_bytes = sizeof (sctp_cookie_ack_chunk_t);
+
+  /* As per RFC 4960 the chunk_length value does NOT contemplate
+   * the size of the first header (see sctp_header_t) and any padding
+   */
+  u16 chunk_len = alloc_bytes - sizeof (sctp_header_t);
+
+  alloc_bytes += vnet_sctp_calculate_padding (alloc_bytes);
+
+  sctp_cookie_ack_chunk_t *cookie_ack_chunk =
+    vlib_buffer_push_uninit (b, alloc_bytes);
+
+  cookie_ack_chunk->sctp_hdr.checksum = 0;
+  cookie_ack_chunk->sctp_hdr.src_port = tc->sub_conn[idx].connection.lcl_port;
+  cookie_ack_chunk->sctp_hdr.dst_port = tc->sub_conn[idx].connection.rmt_port;
+  cookie_ack_chunk->sctp_hdr.verification_tag = tc->remote_tag;
+  vnet_sctp_set_chunk_type (&cookie_ack_chunk->chunk_hdr, COOKIE_ACK);
+  vnet_sctp_set_chunk_length (&cookie_ack_chunk->chunk_hdr, chunk_len);
+
+  vnet_buffer (b)->sctp.connection_index =
+    tc->sub_conn[idx].connection.c_index;
+}
+
+void
+sctp_prepare_cookie_echo_chunk (sctp_connection_t * tc, vlib_buffer_t * b,
+                               sctp_state_cookie_param_t * sc)
+{
+  vlib_main_t *vm = vlib_get_main ();
+  u8 idx = sctp_pick_conn_idx_on_chunk (COOKIE_ECHO);
+
+  sctp_reuse_buffer (vm, b);
+
+  /* The minimum size of the message is given by the sctp_init_ack_chunk_t */
+  u16 alloc_bytes = sizeof (sctp_cookie_echo_chunk_t);
+  /* As per RFC 4960 the chunk_length value does NOT contemplate
+   * the size of the first header (see sctp_header_t) and any padding
+   */
+  u16 chunk_len = alloc_bytes - sizeof (sctp_header_t);
+  alloc_bytes += vnet_sctp_calculate_padding (alloc_bytes);
+  sctp_cookie_echo_chunk_t *cookie_echo_chunk =
+    vlib_buffer_push_uninit (b, alloc_bytes);
+  cookie_echo_chunk->sctp_hdr.checksum = 0;
+  cookie_echo_chunk->sctp_hdr.src_port =
+    tc->sub_conn[idx].connection.lcl_port;
+  cookie_echo_chunk->sctp_hdr.dst_port =
+    tc->sub_conn[idx].connection.rmt_port;
+  cookie_echo_chunk->sctp_hdr.verification_tag = tc->remote_tag;
+  vnet_sctp_set_chunk_type (&cookie_echo_chunk->chunk_hdr, COOKIE_ECHO);
+  vnet_sctp_set_chunk_length (&cookie_echo_chunk->chunk_hdr, chunk_len);
+  clib_memcpy (&(cookie_echo_chunk->cookie), sc,
+              sizeof (sctp_state_cookie_param_t));
+  vnet_buffer (b)->sctp.connection_index =
+    tc->sub_conn[idx].connection.c_index;
+}
+
+/**
+ * Convert buffer to INIT-ACK
+ */
+void
+sctp_prepare_initack_chunk (sctp_connection_t * tc, vlib_buffer_t * b,
+                           ip4_address_t * ip4_addr,
+                           ip6_address_t * ip6_addr)
+{
+  vlib_main_t *vm = vlib_get_main ();
+  sctp_ipv4_addr_param_t *ip4_param = 0;
+  sctp_ipv6_addr_param_t *ip6_param = 0;
+  u8 idx = sctp_pick_conn_idx_on_chunk (INIT_ACK);
+  u32 random_seed = random_default_seed ();
+
+  sctp_reuse_buffer (vm, b);
+
+  /* The minimum size of the message is given by the sctp_init_ack_chunk_t */
+  u16 alloc_bytes =
+    sizeof (sctp_init_ack_chunk_t) + sizeof (sctp_state_cookie_param_t);
+
+  if (PREDICT_TRUE (ip4_addr != NULL))
+    {
+      /* Create room for variable-length fields in the INIT_ACK chunk */
+      alloc_bytes += SCTP_IPV4_ADDRESS_TYPE_LENGTH;
+    }
+  if (PREDICT_TRUE (ip6_addr != NULL))
+    {
+      /* Create room for variable-length fields in the INIT_ACK chunk */
+      alloc_bytes += SCTP_IPV6_ADDRESS_TYPE_LENGTH;
+    }
+
+  if (tc->sub_conn[idx].connection.is_ip4)
+    alloc_bytes += sizeof (sctp_ipv4_addr_param_t);
+  else
+    alloc_bytes += sizeof (sctp_ipv6_addr_param_t);
+
+  /* As per RFC 4960 the chunk_length value does NOT contemplate
+   * the size of the first header (see sctp_header_t) and any padding
+   */
+  u16 chunk_len = alloc_bytes - sizeof (sctp_header_t);
+
+  alloc_bytes += vnet_sctp_calculate_padding (alloc_bytes);
+
+  sctp_init_ack_chunk_t *init_ack_chunk =
+    vlib_buffer_push_uninit (b, alloc_bytes);
+
+  u16 pointer_offset = sizeof (sctp_init_ack_chunk_t);
+
+  /* Create State Cookie parameter */
+  sctp_state_cookie_param_t *state_cookie_param =
+    (sctp_state_cookie_param_t *) ((char *) init_ack_chunk + pointer_offset);
+
+  state_cookie_param->param_hdr.type =
+    clib_host_to_net_u16 (SCTP_STATE_COOKIE_TYPE);
+  state_cookie_param->param_hdr.length =
+    clib_host_to_net_u16 (sizeof (sctp_state_cookie_param_t));
+  state_cookie_param->creation_time = clib_host_to_net_u32 (sctp_time_now ());
+  state_cookie_param->cookie_lifespan =
+    clib_host_to_net_u32 (SCTP_VALID_COOKIE_LIFE);
+  state_cookie_param->mac = clib_host_to_net_u64 (sctp_compute_mac ());
+
+  pointer_offset += sizeof (sctp_state_cookie_param_t);
+
+  if (PREDICT_TRUE (ip4_addr != NULL))
+    {
+      sctp_ipv4_addr_param_t *ipv4_addr =
+       (sctp_ipv4_addr_param_t *) init_ack_chunk + pointer_offset;
+
+      ipv4_addr->param_hdr.type =
+       clib_host_to_net_u16 (SCTP_IPV4_ADDRESS_TYPE);
+      ipv4_addr->param_hdr.length =
+       clib_host_to_net_u16 (SCTP_IPV4_ADDRESS_TYPE_LENGTH);
+      ipv4_addr->address.as_u32 = ip4_addr->as_u32;
+
+      pointer_offset += SCTP_IPV4_ADDRESS_TYPE_LENGTH;
+    }
+  if (PREDICT_TRUE (ip6_addr != NULL))
+    {
+      sctp_ipv6_addr_param_t *ipv6_addr =
+       (sctp_ipv6_addr_param_t *) init_ack_chunk +
+       sizeof (sctp_init_chunk_t) + pointer_offset;
+
+      ipv6_addr->param_hdr.type =
+       clib_host_to_net_u16 (SCTP_IPV6_ADDRESS_TYPE);
+      ipv6_addr->param_hdr.length =
+       clib_host_to_net_u16 (SCTP_IPV6_ADDRESS_TYPE_LENGTH);
+      ipv6_addr->address.as_u64[0] = ip6_addr->as_u64[0];
+      ipv6_addr->address.as_u64[1] = ip6_addr->as_u64[1];
+
+      pointer_offset += SCTP_IPV6_ADDRESS_TYPE_LENGTH;
+    }
+
+  if (tc->sub_conn[idx].connection.is_ip4)
+    {
+      ip4_param = (sctp_ipv4_addr_param_t *) init_ack_chunk + pointer_offset;
+      ip4_param->address.as_u32 =
+       tc->sub_conn[idx].connection.lcl_ip.ip4.as_u32;
+
+      pointer_offset += sizeof (sctp_ipv4_addr_param_t);
+    }
+  else
+    {
+      ip6_param = (sctp_ipv6_addr_param_t *) init_ack_chunk + pointer_offset;
+      ip6_param->address.as_u64[0] =
+       tc->sub_conn[idx].connection.lcl_ip.ip6.as_u64[0];
+      ip6_param->address.as_u64[1] =
+       tc->sub_conn[idx].connection.lcl_ip.ip6.as_u64[1];
+
+      pointer_offset += sizeof (sctp_ipv6_addr_param_t);
+    }
+
+  /* src_port & dst_port are already in network byte-order */
+  init_ack_chunk->sctp_hdr.checksum = 0;
+  init_ack_chunk->sctp_hdr.src_port = tc->sub_conn[idx].connection.lcl_port;
+  init_ack_chunk->sctp_hdr.dst_port = tc->sub_conn[idx].connection.rmt_port;
+  /* the tc->verification_tag is already in network byte-order (being a copy of the init_tag coming with the INIT chunk) */
+  init_ack_chunk->sctp_hdr.verification_tag = tc->remote_tag;
+
+  vnet_sctp_set_chunk_type (&init_ack_chunk->chunk_hdr, INIT_ACK);
+  vnet_sctp_set_chunk_length (&init_ack_chunk->chunk_hdr, chunk_len);
+
+  init_ack_chunk->initiate_tag =
+    clib_host_to_net_u32 (random_u32 (&random_seed));
+  /* As per RFC 4960, the initial_tsn may be the same value as the initiate_tag */
+  init_ack_chunk->initial_tsn = init_ack_chunk->initiate_tag;
+  init_ack_chunk->a_rwnd = clib_host_to_net_u32 (DEFAULT_A_RWND);
+  init_ack_chunk->inboud_streams_count =
+    clib_host_to_net_u16 (INBOUND_STREAMS_COUNT);
+  init_ack_chunk->outbound_streams_count =
+    clib_host_to_net_u16 (OUTBOUND_STREAMS_COUNT);
+
+  tc->local_tag = init_ack_chunk->initiate_tag;
+
+  vnet_buffer (b)->sctp.connection_index =
+    tc->sub_conn[idx].connection.c_index;
+}
+
+/**
+ * Convert buffer to SHUTDOWN
+ */
+void
+sctp_prepare_shutdown_chunk (sctp_connection_t * tc, vlib_buffer_t * b)
+{
+  vlib_main_t *vm = vlib_get_main ();
+  u8 idx = sctp_pick_conn_idx_on_chunk (SHUTDOWN);
+  u16 alloc_bytes = sizeof (sctp_shutdown_association_chunk_t);
+
+  b = sctp_reuse_buffer (vm, b);
+
+  /* As per RFC 4960 the chunk_length value does NOT contemplate
+   * the size of the first header (see sctp_header_t) and any padding
+   */
+  u16 chunk_len = alloc_bytes - sizeof (sctp_header_t);
+
+  alloc_bytes += vnet_sctp_calculate_padding (alloc_bytes);
+
+  sctp_shutdown_association_chunk_t *shutdown_chunk =
+    vlib_buffer_push_uninit (b, alloc_bytes);
+
+  shutdown_chunk->sctp_hdr.checksum = 0;
+  /* No need of host_to_net conversion, already in net-byte order */
+  shutdown_chunk->sctp_hdr.src_port = tc->sub_conn[idx].connection.lcl_port;
+  shutdown_chunk->sctp_hdr.dst_port = tc->sub_conn[idx].connection.rmt_port;
+  shutdown_chunk->sctp_hdr.verification_tag = tc->remote_tag;
+  vnet_sctp_set_chunk_type (&shutdown_chunk->chunk_hdr, SHUTDOWN);
+  vnet_sctp_set_chunk_length (&shutdown_chunk->chunk_hdr, chunk_len);
+
+  shutdown_chunk->cumulative_tsn_ack = tc->rcv_las;
+
+  vnet_buffer (b)->sctp.connection_index =
+    tc->sub_conn[idx].connection.c_index;
+}
+
+/*
+ * Send SHUTDOWN
+ */
+void
+sctp_send_shutdown (sctp_connection_t * tc)
+{
+  vlib_buffer_t *b;
+  u32 bi;
+  sctp_main_t *tm = vnet_get_sctp_main ();
+  vlib_main_t *vm = vlib_get_main ();
+
+  if (sctp_check_outstanding_data_chunks (tc) > 0)
+    return;
+
+  if (PREDICT_FALSE (sctp_get_free_buffer_index (tm, &bi)))
+    return;
+
+  b = vlib_get_buffer (vm, bi);
+  sctp_init_buffer (vm, b);
+  sctp_prepare_shutdown_chunk (tc, b);
+
+  u8 idx = sctp_pick_conn_idx_on_chunk (SHUTDOWN);
+  sctp_push_ip_hdr (tm, &tc->sub_conn[idx], b);
+  sctp_enqueue_to_output_now (vm, b, bi, tc->sub_conn[idx].connection.is_ip4);
+}
+
+/**
+ * Convert buffer to SHUTDOWN_ACK
+ */
+void
+sctp_prepare_shutdown_ack_chunk (sctp_connection_t * tc, vlib_buffer_t * b)
+{
+  u8 idx = sctp_pick_conn_idx_on_chunk (SHUTDOWN_ACK);
+  u16 alloc_bytes = sizeof (sctp_shutdown_association_chunk_t);
+  alloc_bytes += vnet_sctp_calculate_padding (alloc_bytes);
+
+  u16 chunk_len = alloc_bytes - sizeof (sctp_header_t);
+
+  sctp_shutdown_ack_chunk_t *shutdown_ack_chunk =
+    vlib_buffer_push_uninit (b, alloc_bytes);
+
+  shutdown_ack_chunk->sctp_hdr.checksum = 0;
+  /* No need of host_to_net conversion, already in net-byte order */
+  shutdown_ack_chunk->sctp_hdr.src_port =
+    tc->sub_conn[idx].connection.lcl_port;
+  shutdown_ack_chunk->sctp_hdr.dst_port =
+    tc->sub_conn[idx].connection.rmt_port;
+  shutdown_ack_chunk->sctp_hdr.verification_tag = tc->remote_tag;
+
+  vnet_sctp_set_chunk_type (&shutdown_ack_chunk->chunk_hdr, SHUTDOWN_ACK);
+  vnet_sctp_set_chunk_length (&shutdown_ack_chunk->chunk_hdr, chunk_len);
+
+  vnet_buffer (b)->sctp.connection_index =
+    tc->sub_conn[idx].connection.c_index;
+}
+
+/*
+ * Send SHUTDOWN_ACK
+ */
+void
+sctp_send_shutdown_ack (sctp_connection_t * tc)
+{
+  vlib_buffer_t *b;
+  u32 bi;
+  sctp_main_t *tm = vnet_get_sctp_main ();
+  vlib_main_t *vm = vlib_get_main ();
+
+  if (sctp_check_outstanding_data_chunks (tc) > 0)
+    return;
+
+  if (PREDICT_FALSE (sctp_get_free_buffer_index (tm, &bi)))
+    return;
+
+  b = vlib_get_buffer (vm, bi);
+  sctp_init_buffer (vm, b);
+  sctp_prepare_shutdown_ack_chunk (tc, b);
+
+  u8 idx = sctp_pick_conn_idx_on_chunk (SHUTDOWN_ACK);
+  sctp_push_ip_hdr (tm, &tc->sub_conn[idx], b);
+  sctp_enqueue_to_ip_lookup (vm, b, bi, tc->sub_conn[idx].connection.is_ip4);
+
+  /* Start the SCTP_TIMER_T2_SHUTDOWN timer */
+  sctp_timer_set (tc, idx, SCTP_TIMER_T2_SHUTDOWN, SCTP_RTO_INIT);
+  tc->state = SCTP_STATE_SHUTDOWN_ACK_SENT;
+}
+
+/**
+ * Convert buffer to SACK
+ */
+void
+sctp_prepare_sack_chunk (sctp_connection_t * tc, vlib_buffer_t * b)
+{
+  vlib_main_t *vm = vlib_get_main ();
+  u8 idx = sctp_pick_conn_idx_on_chunk (SACK);
+
+  sctp_reuse_buffer (vm, b);
+
+  u16 alloc_bytes = sizeof (sctp_selective_ack_chunk_t);
+
+  /* As per RFC 4960 the chunk_length value does NOT contemplate
+   * the size of the first header (see sctp_header_t) and any padding
+   */
+  u16 chunk_len = alloc_bytes - sizeof (sctp_header_t);
+
+  alloc_bytes += vnet_sctp_calculate_padding (alloc_bytes);
+
+  sctp_selective_ack_chunk_t *sack = vlib_buffer_push_uninit (b, alloc_bytes);
+
+  sack->sctp_hdr.checksum = 0;
+  sack->sctp_hdr.src_port = tc->sub_conn[idx].connection.lcl_port;
+  sack->sctp_hdr.dst_port = tc->sub_conn[idx].connection.rmt_port;
+  sack->sctp_hdr.verification_tag = tc->remote_tag;
+  vnet_sctp_set_chunk_type (&sack->chunk_hdr, SACK);
+  vnet_sctp_set_chunk_length (&sack->chunk_hdr, chunk_len);
+
+  vnet_buffer (b)->sctp.connection_index =
+    tc->sub_conn[idx].connection.c_index;
+}
+
+/**
+ * Convert buffer to SHUTDOWN_COMPLETE
+ */
+void
+sctp_prepare_shutdown_complete_chunk (sctp_connection_t * tc,
+                                     vlib_buffer_t * b)
+{
+  u8 idx = sctp_pick_conn_idx_on_chunk (SHUTDOWN_COMPLETE);
+  u16 alloc_bytes = sizeof (sctp_shutdown_association_chunk_t);
+  alloc_bytes += vnet_sctp_calculate_padding (alloc_bytes);
+
+  u16 chunk_len = alloc_bytes - sizeof (sctp_header_t);
+
+  sctp_shutdown_complete_chunk_t *shutdown_complete =
+    vlib_buffer_push_uninit (b, alloc_bytes);
+
+  shutdown_complete->sctp_hdr.checksum = 0;
+  /* No need of host_to_net conversion, already in net-byte order */
+  shutdown_complete->sctp_hdr.src_port =
+    tc->sub_conn[idx].connection.lcl_port;
+  shutdown_complete->sctp_hdr.dst_port =
+    tc->sub_conn[idx].connection.rmt_port;
+  shutdown_complete->sctp_hdr.verification_tag = tc->remote_tag;
+
+  vnet_sctp_set_chunk_type (&shutdown_complete->chunk_hdr, SHUTDOWN_COMPLETE);
+  vnet_sctp_set_chunk_length (&shutdown_complete->chunk_hdr, chunk_len);
+
+  vnet_buffer (b)->sctp.connection_index =
+    tc->sub_conn[idx].connection.c_index;
+}
+
+void
+sctp_send_shutdown_complete (sctp_connection_t * tc)
+{
+  vlib_buffer_t *b;
+  u32 bi;
+  sctp_main_t *tm = vnet_get_sctp_main ();
+  vlib_main_t *vm = vlib_get_main ();
+
+  if (PREDICT_FALSE (sctp_get_free_buffer_index (tm, &bi)))
+    return;
+
+  b = vlib_get_buffer (vm, bi);
+  sctp_init_buffer (vm, b);
+  sctp_prepare_shutdown_complete_chunk (tc, b);
+
+  u8 idx = sctp_pick_conn_idx_on_chunk (SHUTDOWN_COMPLETE);
+  sctp_push_ip_hdr (tm, &tc->sub_conn[idx], b);
+  sctp_enqueue_to_ip_lookup (vm, b, bi, tc->sub_conn[idx].connection.is_ip4);
+
+  tc->state = SCTP_STATE_CLOSED;
+}
+
+
+/*
+ *  Send INIT
+ */
+void
+sctp_send_init (sctp_connection_t * tc)
+{
+  vlib_buffer_t *b;
+  u32 bi;
+  sctp_main_t *tm = vnet_get_sctp_main ();
+  vlib_main_t *vm = vlib_get_main ();
+
+  if (PREDICT_FALSE (sctp_get_free_buffer_index (tm, &bi)))
+    return;
+
+  b = vlib_get_buffer (vm, bi);
+  u8 idx = sctp_pick_conn_idx_on_chunk (INIT);
+
+  sctp_init_buffer (vm, b);
+  sctp_prepare_init_chunk (tc, b);
+
+  /* Measure RTT with this */
+  tc->rtt_ts = sctp_time_now ();
+  tc->rtt_seq = tc->snd_nxt;
+  tc->rto_boff = 0;
+
+  sctp_push_ip_hdr (tm, &tc->sub_conn[idx], b);
+  sctp_enqueue_to_ip_lookup_now (vm, b, bi, tc->sub_conn[idx].c_is_ip4);
+
+  /* Start the T1_INIT timer */
+  sctp_timer_set (tc, idx, SCTP_TIMER_T1_INIT, SCTP_RTO_INIT);
+  /* Change state to COOKIE_WAIT */
+  tc->state = SCTP_STATE_COOKIE_WAIT;
+}
+
+always_inline u8
+sctp_in_cong_recovery (sctp_connection_t * sctp_conn)
+{
+  return 0;
+}
+
+/**
+ * Push SCTP header and update connection variables
+ */
+static void
+sctp_push_hdr_i (sctp_connection_t * tc, vlib_buffer_t * b,
+                sctp_state_t next_state)
+{
+  u8 idx = sctp_pick_conn_idx_on_chunk (DATA);
+
+  u16 data_len =
+    b->current_length + b->total_length_not_including_first_buffer;
+  ASSERT (!b->total_length_not_including_first_buffer
+         || (b->flags & VLIB_BUFFER_NEXT_PRESENT));
+
+  SCTP_ADV_DBG_OUTPUT ("b->current_length = %u, "
+                      "b->current_data = %p "
+                      "data_len = %u",
+                      b->current_length, b->current_data, data_len);
+
+  u16 bytes_to_add = sizeof (sctp_payload_data_chunk_t);
+  u16 chunk_length = data_len + bytes_to_add - sizeof (sctp_header_t);
+
+  bytes_to_add += vnet_sctp_calculate_padding (bytes_to_add + data_len);
+
+  sctp_payload_data_chunk_t *data_chunk =
+    vlib_buffer_push_uninit (b, bytes_to_add);
+
+  data_chunk->sctp_hdr.checksum = 0;
+  data_chunk->sctp_hdr.src_port = tc->sub_conn[idx].connection.lcl_port;
+  data_chunk->sctp_hdr.dst_port = tc->sub_conn[idx].connection.rmt_port;
+  data_chunk->sctp_hdr.verification_tag = tc->remote_tag;
+
+  data_chunk->tsn = clib_host_to_net_u32 (0);
+  data_chunk->stream_id = clib_host_to_net_u16 (0);
+  data_chunk->stream_seq = clib_host_to_net_u16 (0);
+
+  vnet_sctp_set_chunk_type (&data_chunk->chunk_hdr, DATA);
+  vnet_sctp_set_chunk_length (&data_chunk->chunk_hdr, chunk_length);
+
+  SCTP_ADV_DBG_OUTPUT ("POINTER_WITH_DATA = %p, DATA_OFFSET = %u",
+                      b->data, b->current_data);
+
+  vnet_buffer (b)->sctp.connection_index =
+    tc->sub_conn[idx].connection.c_index;
+}
+
+u32
+sctp_push_header (transport_connection_t * tconn, vlib_buffer_t * b)
+{
+  sctp_connection_t *tc = sctp_get_connection_from_transport (tconn);
+  sctp_push_hdr_i (tc, b, SCTP_STATE_ESTABLISHED);
+
+  if (tc->rtt_ts == 0 && !sctp_in_cong_recovery (tc))
+    {
+      tc->rtt_ts = sctp_time_now ();
+      tc->rtt_seq = tc->snd_nxt;
+    }
+  sctp_trajectory_add_start (b0, 3);
+
+  return 0;
+
+}
+
+always_inline uword
+sctp46_output_inline (vlib_main_t * vm,
+                     vlib_node_runtime_t * node,
+                     vlib_frame_t * from_frame, int is_ip4)
+{
+  u32 n_left_from, next_index, *from, *to_next;
+  u32 my_thread_index = vm->thread_index;
+
+  from = vlib_frame_vector_args (from_frame);
+  n_left_from = from_frame->n_vectors;
+  next_index = node->cached_next_index;
+  sctp_set_time_now (my_thread_index);
+
+  while (n_left_from > 0)
+    {
+      u32 n_left_to_next;
+
+      vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
+
+      while (n_left_from > 0 && n_left_to_next > 0)
+       {
+         u32 bi0;
+         vlib_buffer_t *b0;
+         sctp_header_t *sctp_hdr = 0;
+         sctp_connection_t *tc0;
+         sctp_tx_trace_t *t0;
+         sctp_header_t *th0 = 0;
+         u32 error0 = SCTP_ERROR_PKTS_SENT, next0 =
+           SCTP_OUTPUT_NEXT_IP_LOOKUP;
+
+#if SCTP_DEBUG_STATE_MACHINE
+         u16 packet_length = 0;
+#endif
+
+         bi0 = from[0];
+         to_next[0] = bi0;
+         from += 1;
+         to_next += 1;
+         n_left_from -= 1;
+         n_left_to_next -= 1;
+
+         b0 = vlib_get_buffer (vm, bi0);
+         tc0 = sctp_connection_get (vnet_buffer (b0)->sctp.connection_index,
+                                    my_thread_index);
+
+         if (PREDICT_FALSE (tc0 == 0))
+           {
+             error0 = SCTP_ERROR_INVALID_CONNECTION;
+             next0 = SCTP_OUTPUT_NEXT_DROP;
+             goto done;
+           }
+
+         u8 idx = sctp_pick_conn_idx_on_state (tc0->state);
+
+         th0 = vlib_buffer_get_current (b0);
+
+         if (is_ip4)
+           {
+             ip4_header_t *th0 = vlib_buffer_push_ip4 (vm,
+                                                       b0,
+                                                       &tc0->sub_conn
+                                                       [idx].connection.
+                                                       lcl_ip.ip4,
+                                                       &tc0->
+                                                       sub_conn
+                                                       [idx].connection.
+                                                       rmt_ip.ip4,
+                                                       IP_PROTOCOL_SCTP, 1);
+
+             u32 checksum = ip4_sctp_compute_checksum (vm, b0, th0);
+
+             sctp_hdr = ip4_next_header (th0);
+             sctp_hdr->checksum = checksum;
+
+             vnet_buffer (b0)->l4_hdr_offset = (u8 *) th0 - b0->data;
+             th0->checksum = 0;
+
+#if SCTP_DEBUG_STATE_MACHINE
+             packet_length = clib_net_to_host_u16 (th0->length);
+#endif
+           }
+         else
+           {
+             ip6_header_t *ih0;
+             ih0 = vlib_buffer_push_ip6 (vm,
+                                         b0,
+                                         &tc0->sub_conn[idx].
+                                         connection.lcl_ip.ip6,
+                                         &tc0->sub_conn[idx].
+                                         connection.rmt_ip.ip6,
+                                         IP_PROTOCOL_SCTP);
+
+             int bogus = ~0;
+             u32 checksum = ip6_sctp_compute_checksum (vm, b0, ih0, &bogus);
+             ASSERT (!bogus);
+
+             sctp_hdr = ip6_next_header (ih0);
+             sctp_hdr->checksum = checksum;
+
+             vnet_buffer (b0)->l3_hdr_offset = (u8 *) ih0 - b0->data;
+             vnet_buffer (b0)->l4_hdr_offset = (u8 *) th0 - b0->data;
+             th0->checksum = 0;
+
+#if SCTP_DEBUG_STATE_MACHINE
+             packet_length = clib_net_to_host_u16 (ih0->payload_length);
+#endif
+           }
+
+         u8 is_valid =
+           (tc0->sub_conn[idx].connection.lcl_port ==
+            sctp_hdr->src_port
+            || tc0->sub_conn[idx].connection.lcl_port ==
+            sctp_hdr->dst_port)
+           && (tc0->sub_conn[idx].connection.rmt_port ==
+               sctp_hdr->dst_port
+               || tc0->sub_conn[idx].connection.rmt_port ==
+               sctp_hdr->src_port);
+
+         sctp_full_hdr_t *full_hdr = (sctp_full_hdr_t *) sctp_hdr;
+         u8 chunk_type = vnet_sctp_get_chunk_type (&full_hdr->common_hdr);
+
+         if (!is_valid)
+           {
+             SCTP_DBG_STATE_MACHINE ("BUFFER IS INCORRECT: conn_index = %u, "
+                                     "packet_length = %u, "
+                                     "chunk_type = %u [%s], "
+                                     "connection.lcl_port = %u, sctp_hdr->src_port = %u, "
+                                     "connection.rmt_port = %u, sctp_hdr->dst_port = %u",
+                                     tc0->sub_conn
+                                     [idx].connection.c_index, packet_length,
+                                     chunk_type,
+                                     sctp_chunk_to_string (chunk_type),
+                                     tc0->sub_conn[idx].connection.lcl_port,
+                                     sctp_hdr->src_port,
+                                     tc0->sub_conn[idx].connection.rmt_port,
+                                     sctp_hdr->dst_port);
+
+             error0 = SCTP_ERROR_UNKOWN_CHUNK;
+             next0 = SCTP_OUTPUT_NEXT_DROP;
+             goto done;
+           }
+
+         SCTP_DBG_STATE_MACHINE
+           ("CONN_INDEX = %u, CURR_CONN_STATE = %u (%s), "
+            "CHUNK_TYPE = %s, " "SRC_PORT = %u, DST_PORT = %u",
+            tc0->sub_conn[idx].connection.c_index,
+            tc0->state, sctp_state_to_string (tc0->state),
+            sctp_chunk_to_string (chunk_type), full_hdr->hdr.src_port,
+            full_hdr->hdr.dst_port);
+
+         if (chunk_type == DATA)
+           SCTP_ADV_DBG_OUTPUT ("PACKET_LENGTH = %u", packet_length);
+
+         /* Let's make sure the state-machine does not send anything crazy */
+         switch (tc0->state)
+           {
+           case SCTP_STATE_CLOSED:
+             {
+               if (chunk_type != INIT && chunk_type != INIT_ACK)
+                 {
+                   SCTP_DBG_STATE_MACHINE
+                     ("Sending the wrong chunk (%s) based on state-machine status (%s)",
+                      sctp_chunk_to_string (chunk_type),
+                      sctp_state_to_string (tc0->state));
+
+                   error0 = SCTP_ERROR_UNKOWN_CHUNK;
+                   next0 = SCTP_OUTPUT_NEXT_DROP;
+                   goto done;
+                 }
+               break;
+             }
+           case SCTP_STATE_ESTABLISHED:
+             if (chunk_type != DATA && chunk_type != HEARTBEAT &&
+                 chunk_type != HEARTBEAT_ACK && chunk_type != SACK &&
+                 chunk_type != COOKIE_ACK && chunk_type != SHUTDOWN)
+               {
+                 SCTP_DBG_STATE_MACHINE
+                   ("Sending the wrong chunk (%s) based on state-machine status (%s)",
+                    sctp_chunk_to_string (chunk_type),
+                    sctp_state_to_string (tc0->state));
+
+                 error0 = SCTP_ERROR_UNKOWN_CHUNK;
+                 next0 = SCTP_OUTPUT_NEXT_DROP;
+                 goto done;
+               }
+             break;
+           case SCTP_STATE_COOKIE_WAIT:
+             if (chunk_type != COOKIE_ECHO)
+               {
+                 SCTP_DBG_STATE_MACHINE
+                   ("Sending the wrong chunk (%s) based on state-machine status (%s)",
+                    sctp_chunk_to_string (chunk_type),
+                    sctp_state_to_string (tc0->state));
+
+                 error0 = SCTP_ERROR_UNKOWN_CHUNK;
+                 next0 = SCTP_OUTPUT_NEXT_DROP;
+                 goto done;
+               }
+             /* Change state */
+             tc0->state = SCTP_STATE_COOKIE_ECHOED;
+             break;
+           default:
+             SCTP_DBG_STATE_MACHINE
+               ("Sending chunk (%s) based on state-machine status (%s)",
+                sctp_chunk_to_string (chunk_type),
+                sctp_state_to_string (tc0->state));
+             break;
+           }
+
+         if (chunk_type == SHUTDOWN)
+           {
+             /* Start the SCTP_TIMER_T2_SHUTDOWN timer */
+             sctp_timer_set (tc0, idx, SCTP_TIMER_T2_SHUTDOWN,
+                             SCTP_RTO_INIT);
+             tc0->state = SCTP_STATE_SHUTDOWN_SENT;
+           }
+
+         vnet_buffer (b0)->sw_if_index[VLIB_RX] = 0;
+         vnet_buffer (b0)->sw_if_index[VLIB_TX] = ~0;
+
+         b0->flags |= VNET_BUFFER_F_LOCALLY_ORIGINATED;
+
+         SCTP_DBG_STATE_MACHINE ("CONNECTION_INDEX = %u, "
+                                 "NEW_STATE = %s, "
+                                 "CHUNK_SENT = %s",
+                                 tc0->sub_conn[idx].connection.c_index,
+                                 sctp_state_to_string (tc0->state),
+                                 sctp_chunk_to_string (chunk_type));
+
+         vnet_sctp_common_hdr_params_host_to_net (&full_hdr->common_hdr);
+
+       done:
+         b0->error = node->errors[error0];
+         if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED))
+           {
+             t0 = vlib_add_trace (vm, node, b0, sizeof (*t0));
+             if (th0)
+               {
+                 clib_memcpy (&t0->sctp_header, th0,
+                              sizeof (t0->sctp_header));
+               }
+             else
+               {
+                 memset (&t0->sctp_header, 0, sizeof (t0->sctp_header));
+               }
+             clib_memcpy (&t0->sctp_connection, tc0,
+                          sizeof (t0->sctp_connection));
+           }
+
+         vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next,
+                                          n_left_to_next, bi0, next0);
+       }
+
+      vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+    }
+
+  return from_frame->n_vectors;
+}
+
+static uword
+sctp4_output (vlib_main_t * vm, vlib_node_runtime_t * node,
+             vlib_frame_t * from_frame)
+{
+  return sctp46_output_inline (vm, node, from_frame, 1 /* is_ip4 */ );
+}
+
+static uword
+sctp6_output (vlib_main_t * vm, vlib_node_runtime_t * node,
+             vlib_frame_t * from_frame)
+{
+  return sctp46_output_inline (vm, node, from_frame, 0 /* is_ip4 */ );
+}
+
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE (sctp4_output_node) =
+{
+  .function = sctp4_output,.name = "sctp4-output",
+    /* Takes a vector of packets. */
+    .vector_size = sizeof (u32),
+    .n_errors = SCTP_N_ERROR,
+    .error_strings = sctp_error_strings,
+    .n_next_nodes = SCTP_OUTPUT_N_NEXT,
+    .next_nodes = {
+#define _(s,n) [SCTP_OUTPUT_NEXT_##s] = n,
+    foreach_sctp4_output_next
+#undef _
+    },
+    .format_buffer = format_sctp_header,
+    .format_trace = format_sctp_tx_trace,
+};
+/* *INDENT-ON* */
+
+VLIB_NODE_FUNCTION_MULTIARCH (sctp4_output_node, sctp4_output);
+
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE (sctp6_output_node) =
+{
+  .function = sctp6_output,
+  .name = "sctp6-output",
+    /* Takes a vector of packets. */
+  .vector_size = sizeof (u32),
+  .n_errors = SCTP_N_ERROR,
+  .error_strings = sctp_error_strings,
+  .n_next_nodes = SCTP_OUTPUT_N_NEXT,
+  .next_nodes = {
+#define _(s,n) [SCTP_OUTPUT_NEXT_##s] = n,
+    foreach_sctp6_output_next
+#undef _
+  },
+  .format_buffer = format_sctp_header,
+  .format_trace = format_sctp_tx_trace,
+};
+/* *INDENT-ON* */
+
+VLIB_NODE_FUNCTION_MULTIARCH (sctp6_output_node, sctp6_output);
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/sctp/sctp_packet.h b/src/vnet/sctp/sctp_packet.h
new file mode 100644 (file)
index 0000000..4c358db
--- /dev/null
@@ -0,0 +1,1445 @@
+/*
+ * Copyright (c) 2017 SUSE LLC.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef included_vnet_sctp_packet_h
+#define included_vnet_sctp_packet_h
+
+#include <stdbool.h>
+
+#include <vnet/ip/ip4_packet.h>
+#include <vnet/ip/ip6_packet.h>
+
+/*
+ * As per RFC 4960
+ * https://tools.ietf.org/html/rfc4960
+ */
+
+/*
+ * 0                   1                   2                   3
+ * 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * |     Source Port Number        |     Destination Port Number   |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * |                      Verification Tag                         |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * |                           Checksum                            |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ */
+typedef struct
+{
+  /*
+   * This is the SCTP sender's port number. It can be used by the
+   * receiver in combination with the source IP address, the SCTP
+   * destination port, and possibly the destination IP address to
+   * identify the association to which this packet belongs.
+   * The port number 0 MUST NOT be used.
+   */
+  u16 src_port;
+
+  /*
+   * This is the SCTP port number to which this packet is destined.
+   * The receiving host will use this port number to de-multiplex the
+   * SCTP packet to the correct receiving endpoint/application.
+   * The port number 0 MUST NOT be used.
+   */
+  u16 dst_port;
+
+  /*
+   * The receiver of this packet uses the Verification Tag to validate
+   * the sender of this SCTP packet.  On transmit, the value of this
+   * Verification Tag MUST be set to the value of the Initiate Tag
+   * received from the peer endpoint during the association
+   * initialization, with the following exceptions:
+   * - A packet containing an INIT chunk MUST have a zero Verification
+   *   Tag.
+   * - A packet containing a SHUTDOWN COMPLETE chunk with the T bit
+   *   set MUST have the Verification Tag copied from the packet with
+   *   the SHUTDOWN ACK chunk.
+   * - A packet containing an ABORT chunk may have the verification tag
+   *   copied from the packet that caused the ABORT to be sent.
+   * An INIT chunk MUST be the only chunk in the SCTP packet carrying it.
+   */
+  u32 verification_tag;
+
+  /*
+   * This field contains the checksum of this SCTP packet.
+   * SCTP uses the CRC32c algorithm.
+   */
+  u32 checksum;
+
+} sctp_header_t;
+
+always_inline void
+vnet_set_sctp_src_port (sctp_header_t * h, u16 src_port)
+{
+  h->src_port = clib_host_to_net_u16 (src_port);
+}
+
+always_inline u16
+vnet_get_sctp_src_port (sctp_header_t * h)
+{
+  return (clib_net_to_host_u16 (h->src_port));
+}
+
+always_inline void
+vnet_set_sctp_dst_port (sctp_header_t * h, u16 dst_port)
+{
+  h->dst_port = clib_host_to_net_u16 (dst_port);
+}
+
+always_inline u16
+vnet_get_sctp_dst_port (sctp_header_t * h)
+{
+  return (clib_net_to_host_u16 (h->dst_port));
+}
+
+always_inline void
+vnet_set_sctp_verification_tag (sctp_header_t * h, u32 verification_tag)
+{
+  h->verification_tag = clib_host_to_net_u32 (verification_tag);
+}
+
+always_inline u32
+vnet_get_sctp_verification_tag (sctp_header_t * h)
+{
+  return (clib_net_to_host_u32 (h->verification_tag));
+}
+
+always_inline void
+vnet_set_sctp_checksum (sctp_header_t * h, u32 checksum)
+{
+  h->checksum = clib_host_to_net_u32 (checksum);
+}
+
+always_inline u32
+vnet_get_sctp_checksum (sctp_header_t * h)
+{
+  return (clib_net_to_host_u32 (h->checksum));
+}
+
+/*
+ * Multiple chunks can be bundled into one SCTP packet up to the MTU
+ * size, except for the INIT, INIT ACK, and SHUTDOWN COMPLETE chunks.
+ * These chunks MUST NOT be bundled with any other chunk in a packet.
+ *
+ *
+ * 0                   1                   2                   3
+ * 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * |                        Common Header                          |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * |                          Chunk #1                             |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * |                           ...                                 |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * |                          Chunk #n                             |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ */
+
+typedef enum
+{
+  DATA = 0,
+  INIT,
+  INIT_ACK,
+  SACK,
+  HEARTBEAT,
+  HEARTBEAT_ACK,
+  ABORT,
+  SHUTDOWN,
+  SHUTDOWN_ACK,
+  OPERATION_ERROR,
+  COOKIE_ECHO,
+  COOKIE_ACK,
+  ECNE,
+  CWR,
+  SHUTDOWN_COMPLETE
+} sctp_chunk_type;
+
+/*
+ * 0                   1                   2                   3
+ * 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * |   Chunk Type  | Chunk  Flags  |        Chunk Length           |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ */
+typedef struct
+{
+  /*
+   * This field identifies the type of information contained in the
+   * Chunk Value field. It takes a value from 0 to 254.
+   * The value of 255 is reserved for future use as an extension field.
+   *
+   * The values of Chunk Types are defined as follows:
+   * ID Value    Chunk Type
+   * -----       ----------
+   *  0          - Payload Data (DATA)
+   *  1          - Initiation (INIT)
+   *  2          - Initiation Acknowledgement (INIT ACK)
+   *  3          - Selective Acknowledgement (SACK)
+   *  4          - Heartbeat Request (HEARTBEAT)
+   *  5          - Heartbeat Acknowledgement (HEARTBEAT ACK)
+   *  6          - Abort (ABORT)
+   *  7          - Shutdown (SHUTDOWN)
+   *  8          - Shutdown Acknowledgement (SHUTDOWN ACK)
+   *  9          - Operation Error (ERROR)
+   *  10         - State Cookie (COOKIE ECHO)
+   *  11         - Cookie Acknowledgement (COOKIE ACK)
+   *  12         - Reserved for Explicit Congestion Notification Echo (ECNE)
+   *  13         - Reserved for Congestion Window Reduced (CWR)
+   *  14         - Shutdown Complete (SHUTDOWN COMPLETE)
+   *  15 to 62   - available
+   *  63         - reserved for IETF-defined Chunk Extensions
+   *  64 to 126  - available
+   *  127        - reserved for IETF-defined Chunk Extensions
+   *  128 to 190 - available
+   *  191        - reserved for IETF-defined Chunk Extensions
+   *  192 to 254 - available
+   *  255        - reserved for IETF-defined Chunk Extensions
+   *
+   *  Chunk Types are encoded such that the highest-order 2 bits specify
+   *  the action that must be taken if the processing endpoint does not
+   *  recognize the Chunk Type.
+   *  00 -  Stop processing this SCTP packet and discard it, do not
+   *  process any further chunks within it.
+   *  01 -  Stop processing this SCTP packet and discard it, do not
+   *  process any further chunks within it, and report the
+   *  unrecognized chunk in an 'Unrecognized Chunk Type'.
+   *  10 -  Skip this chunk and continue processing.
+   *  11 -  Skip this chunk and continue processing, but report in an
+   *  ERROR chunk using the 'Unrecognized Chunk Type' cause of error.
+   *
+   *  Note: The ECNE and CWR chunk types are reserved for future use of
+   *  Explicit Congestion Notification (ECN);
+   */
+  //u8 type;
+
+  /*
+   * The usage of these bits depends on the Chunk type as given by the
+   * Chunk Type field.  Unless otherwise specified, they are set to 0 on
+   * transmit and are ignored on receipt.
+   */
+  //u8 flags;
+
+  /*
+   * This value represents the size of the chunk in bytes, including
+   * the Chunk Type, Chunk Flags, Chunk Length, and Chunk Value fields.
+   * Therefore, if the Chunk Value field is zero-length, the Length
+   * field will be set to 4.
+   * The Chunk Length field does not count any chunk padding.
+   * Chunks (including Type, Length, and Value fields) are padded out
+   * by the sender with all zero bytes to be a multiple of 4 bytes
+   * long. This padding MUST NOT be more than 3 bytes in total. The
+   * Chunk Length value does not include terminating padding of the
+   * chunk. However, it does include padding of any variable-length
+   * parameter except the last parameter in the chunk. The receiver
+   * MUST ignore the padding.
+   *
+   * Note: A robust implementation should accept the chunk whether or
+   * not the final padding has been included in the Chunk Length.
+   */
+  //u16 length;
+
+  u32 params;
+
+} sctp_chunks_common_hdr_t;
+
+typedef struct
+{
+  sctp_header_t hdr;
+  sctp_chunks_common_hdr_t common_hdr;
+
+} sctp_full_hdr_t;
+
+#define CHUNK_TYPE_MASK 0xFF000000
+#define CHUNK_TYPE_SHIFT 24
+
+#define CHUNK_FLAGS_MASK 0x00FF0000
+#define CHUNK_FLAGS_SHIFT 16
+
+#define CHUNK_LENGTH_MASK 0x0000FFFF
+#define CHUNK_LENGTH_SHIFT 0
+
+always_inline void
+vnet_sctp_common_hdr_params_host_to_net (sctp_chunks_common_hdr_t * h)
+{
+  h->params = clib_host_to_net_u32 (h->params);
+}
+
+always_inline void
+vnet_sctp_common_hdr_params_net_to_host (sctp_chunks_common_hdr_t * h)
+{
+  h->params = clib_net_to_host_u32 (h->params);
+}
+
+always_inline void
+vnet_sctp_set_chunk_type (sctp_chunks_common_hdr_t * h, sctp_chunk_type t)
+{
+  h->params &= ~(CHUNK_TYPE_MASK);
+  h->params |= (t << CHUNK_TYPE_SHIFT) & CHUNK_TYPE_MASK;
+}
+
+always_inline u8
+vnet_sctp_get_chunk_type (sctp_chunks_common_hdr_t * h)
+{
+  return ((h->params & CHUNK_TYPE_MASK) >> CHUNK_TYPE_SHIFT);
+}
+
+always_inline void
+vnet_sctp_set_chunk_length (sctp_chunks_common_hdr_t * h, u16 length)
+{
+  h->params &= ~(CHUNK_LENGTH_MASK);
+  h->params |= (length << CHUNK_LENGTH_SHIFT) & CHUNK_LENGTH_MASK;
+}
+
+always_inline u16
+vnet_sctp_get_chunk_length (sctp_chunks_common_hdr_t * h)
+{
+  return ((h->params & CHUNK_LENGTH_MASK) >> CHUNK_LENGTH_SHIFT);
+}
+
+/*
+ * Payload chunk
+ *
+ * 0                   1                   2                   3
+ * 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * |   Type = 0    | Reserved|U|B|E|    Length                     |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * |                              TSN                              |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * |      Stream Identifier S      |   Stream Sequence Number n    |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * |                  Payload Protocol Identifier                  |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * \                                                               \
+ * /                 User Data (seq n of Stream S)                 /
+ * \                                                               \
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ */
+typedef struct
+{
+  sctp_header_t sctp_hdr;
+  /*
+   * Type (8 bits): 0
+   * Flags (8 bits):
+   * -- Reserved (5 bits): all 0s
+   * -- U bit
+   * -- B bit
+   * -- E bit
+   * Length (16 bits): This field indicates the length of the DATA chunk in
+   * bytes from the beginning of the type field to the end of the User Data
+   * field excluding any padding.
+   * A DATA chunk with one byte of user data will have Length set to 17
+   * (indicating 17 bytes). A DATA chunk with a User Data field of length L
+   * will have the Length field set to (16 + L) (indicating 16+L bytes) where
+   * L MUST be greater than 0.
+   */
+
+  /*
+   * Fragment Description Table:
+   *
+   *    B E                  Description
+   * ============================================================
+   * |  1 0 | First piece of a fragmented user message          |
+   * +----------------------------------------------------------+
+   * |  0 0 | Middle piece of a fragmented user message         |
+   * +----------------------------------------------------------+
+   * |  0 1 | Last piece of a fragmented user message           |
+   * +----------------------------------------------------------+
+   * |  1 1 | Unfragmented message                              |
+   * ============================================================
+   */
+  sctp_chunks_common_hdr_t chunk_hdr;
+
+  /*
+   * This value represents the TSN for this DATA chunk.
+   * The valid range of TSN is from 0 to 4294967295 (2**32 - 1).
+   * TSN wraps back to 0 after reaching 4294967295.
+   */
+  u32 tsn;
+
+  /*
+   * Identifies the stream to which the following user data belongs.
+   */
+  u16 stream_id;
+
+  /*
+   * This value represents the Stream Sequence Number of the following user data
+   * within the stream S. Valid range is 0 to 65535.
+   * When a user message is fragmented by SCTP for transport, the same Stream
+   * Sequence Number MUST be carried in each of the fragments of the message.
+   */
+  u16 stream_seq;
+
+  /*
+   * This value represents an application (or upper layer) specified protocol
+   * identifier. This value is passed to SCTP by its upper layer and sent to its
+   * peer. This identifier is not used by SCTP but can be used by certain network
+   * entities, as well as by the peer application, to identify the type of
+   * information being carried in this DATA chunk. This field must be sent even
+   * in fragmented DATA chunks (to make sure it is available for agents in the
+   * middle of the network).  Note that this field is NOT touched by an SCTP
+   * implementation; therefore, its byte order is NOT necessarily big endian.
+   * The upper layer is responsible for any byte order conversions to this field.
+   * The value 0 indicates that no application identifier is specified by the
+   * upper layer for this payload data.
+   */
+  u32 payload_id;
+
+  /*
+   * This is the payload user data. The implementation MUST pad the end of the
+   * data to a 4-byte boundary with all-zero bytes. Any padding MUST NOT be
+   * included in the Length field. A sender MUST never add more than 3 bytes of
+   * padding.
+   */
+  u32 data[];
+
+} sctp_payload_data_chunk_t;
+
+always_inline void
+vnet_sctp_set_ebit (sctp_payload_data_chunk_t * p, u8 enable)
+{
+  //p->chunk_hdr.flags = clib_host_to_net_u16 (enable);
+}
+
+always_inline u8
+vnet_sctp_get_ebit (sctp_payload_data_chunk_t * p)
+{
+  //return (clib_net_to_host_u16 (p->chunk_hdr.flags));
+  return 0;
+}
+
+always_inline void
+vnet_sctp_set_bbit (sctp_payload_data_chunk_t * p, u8 enable)
+{
+  //p->chunk_hdr.flags = clib_host_to_net_u16 (enable << 1);
+}
+
+always_inline u8
+vnet_sctp_get_bbit (sctp_payload_data_chunk_t * p)
+{
+  //return (clib_net_to_host_u16 (p->chunk_hdr.flags >> 1));
+  return 0;
+}
+
+always_inline void
+vnet_sctp_set_ubit (sctp_payload_data_chunk_t * p, u8 enable)
+{
+  //p->chunk_hdr.flags = clib_host_to_net_u16 (enable << 2);
+}
+
+always_inline u8
+vnet_sctp_get_ubit (sctp_payload_data_chunk_t * p)
+{
+  //return (clib_net_to_host_u16 (p->chunk_hdr.flags >> 2));
+  return 0;
+}
+
+always_inline void
+vnet_sctp_set_tsn (sctp_payload_data_chunk_t * p, u32 tsn)
+{
+  p->tsn = clib_host_to_net_u32 (tsn);
+}
+
+always_inline u32
+vnet_sctp_get_tsn (sctp_payload_data_chunk_t * p)
+{
+  return (clib_net_to_host_u32 (p->tsn));
+}
+
+always_inline void
+vnet_sctp_set_stream_id (sctp_payload_data_chunk_t * p, u16 stream_id)
+{
+  p->stream_id = clib_host_to_net_u16 (stream_id);
+}
+
+always_inline u16
+vnet_sctp_get_stream_id (sctp_payload_data_chunk_t * p)
+{
+  return (clib_net_to_host_u16 (p->stream_id));
+}
+
+always_inline void
+vnet_sctp_set_stream_seq (sctp_payload_data_chunk_t * p, u16 stream_seq)
+{
+  p->stream_seq = clib_host_to_net_u16 (stream_seq);
+}
+
+always_inline u16
+vnet_sctp_get_stream_seq (sctp_payload_data_chunk_t * p)
+{
+  return (clib_net_to_host_u16 (p->stream_seq));
+}
+
+always_inline void
+vnet_sctp_set_payload_id (sctp_payload_data_chunk_t * p, u32 payload_id)
+{
+  p->payload_id = clib_host_to_net_u32 (payload_id);
+}
+
+always_inline u32
+vnet_sctp_get_payload_id (sctp_payload_data_chunk_t * p)
+{
+  return (clib_net_to_host_u32 (p->payload_id));
+}
+
+always_inline u16
+vnet_sctp_calculate_padding (u16 base_length)
+{
+  if (base_length % 4 == 0)
+    return 0;
+
+  return (4 - base_length % 4);
+}
+
+always_inline u16
+vnet_sctp_calculate_payload_data_padding (sctp_payload_data_chunk_t * p)
+{
+  u16 payload_length = vnet_sctp_get_chunk_length (&p->chunk_hdr) -
+    sizeof (p->chunk_hdr) -
+    sizeof (p->tsn) -
+    sizeof (p->stream_id) - sizeof (p->stream_seq) - sizeof (p->payload_id);
+
+  return vnet_sctp_calculate_padding (payload_length);
+}
+
+#define DEFAULT_A_RWND 1480
+#define INBOUND_STREAMS_COUNT 1
+#define OUTBOUND_STREAMS_COUNT 1
+
+/*
+ * INIT chunk
+ *
+ * This chunk is used to initiate an SCTP association between two
+ * endpoints.
+ *
+ * 0                   1                   2                   3
+ * 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * |   Type = 1    |  Chunk Flags  |      Chunk Length             |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * |                         Initiate Tag                          |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * |           Advertised Receiver Window Credit (a_rwnd)          |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * |  Number of Outbound Streams   |  Number of Inbound Streams    |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * |                          Initial TSN                          |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * \                                                               \
+ * /              Optional/Variable-Length Parameters              /
+ * \                                                               \
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ *
+ * The INIT chunk contains the following parameters. Unless otherwise
+ * noted, each parameter MUST only be included once in the INIT chunk.
+ *
+ * Fixed Parameters                     Status
+ * ----------------------------------------------
+ *  Initiate Tag                        Mandatory
+ *  Advertised Receiver Window Credit   Mandatory
+ *  Number of Outbound Streams          Mandatory
+ *  Number of Inbound Streams           Mandatory
+ *  Initial TSN                         Mandatory
+ *
+ * Variable Parameters                  Status     Type Value
+ * -------------------------------------------------------------
+ *  IPv4 Address (Note 1)               Optional    5
+ *  IPv6 Address (Note 1)               Optional    6
+ *  Cookie Preservative                        Optional    9
+ *  Reserved for ECN Capable (Note 2)   Optional    32768 (0x8000)
+ *  Host Name Address (Note 3)          Optional    11
+ *  Supported Address Types (Note 4)    Optional    12
+ *
+ * Note 1: The INIT chunks can contain multiple addresses that can be
+ * IPv4 and/or IPv6 in any combination.
+ *
+ * Note 2: The ECN Capable field is reserved for future use of Explicit
+ * Congestion Notification.
+ *
+ * Note 3: An INIT chunk MUST NOT contain more than one Host Name Address
+ * parameter. Moreover, the sender of the INIT MUST NOT combine any other
+ * address types with the Host Name Address in the INIT. The receiver of
+ * INIT MUST ignore any other address types if the Host Name Address parameter
+ * is present in the received INIT chunk.
+ *
+ * Note 4: This parameter, when present, specifies all the address types the
+ * sending endpoint can support.  The absence of this parameter indicates that
+ * the sending endpoint can support any address type.
+ *
+ * IMPLEMENTATION NOTE: If an INIT chunk is received with known parameters that
+ * are not optional parameters of the INIT chunk, then the receiver SHOULD
+ * process the INIT chunk and send back an INIT ACK. The receiver of the INIT
+ * chunk MAY bundle an ERROR chunk with the COOKIE ACK chunk later.
+ * However, restrictive implementations MAY send back an ABORT chunk in response
+ * to the INIT chunk. The Chunk Flags field in INIT is reserved, and all bits
+ * in it should be set to 0 by the sender and ignored by the receiver.
+ * The sequence of parameters within an INIT can be processed in any order.
+ */
+typedef struct
+{
+  sctp_header_t sctp_hdr;
+  sctp_chunks_common_hdr_t chunk_hdr;
+
+  /*
+   * The receiver of the INIT (the responding end) records the value of
+   * the Initiate Tag parameter.
+   * This value MUST be placed into the Verification Tag field of every
+   * SCTP packet that the receiver of the INIT transmits within this association.
+   * The Initiate Tag is allowed to have any value except 0.
+   *
+   * If the value of the Initiate Tag in a received INIT chunk is found
+   * to be 0, the receiver MUST treat it as an error and close the
+   * association by transmitting an ABORT.
+   *
+   * The value of the INIT TAG is recommended to be random for security
+   * reasons. A good method is described in https://tools.ietf.org/html/rfc4086
+   */
+  u32 initiate_tag;
+
+  /*
+   * This value represents the dedicated buffer space, in number of bytes,
+   * the sender of the INIT has reserved in association with this window.
+   * During the life of the association, this buffer space SHOULD NOT be
+   * lessened (i.e., dedicated buffers taken away from this association);
+   * however, an endpoint MAY change the value of a_rwnd it sends in SACK
+   * chunks.
+   */
+  u32 a_rwnd;
+
+  /*
+   * Defines the number of outbound streams the sender of this INIT chunk
+   * wishes to create in this association.
+   * The value of 0 MUST NOT be used.
+   *
+   * Note: A receiver of an INIT with the OS value set to 0 SHOULD abort
+   * the association.
+   */
+  u16 outbound_streams_count;
+
+  /*
+   * Defines the maximum number of streams the sender of this INIT
+   * chunk allows the peer end to create in this association.
+   * The value 0 MUST NOT be used.
+   *
+   * Note: There is no negotiation of the actual number of streams but
+   * instead the two endpoints will use the min(requested, offered).
+   *
+   * Note: A receiver of an INIT with the MIS value of 0 SHOULD abort
+   * the association.
+   */
+  u16 inboud_streams_count;
+
+  /*
+   * Defines the initial TSN that the sender will use.
+   * The valid range is from 0 to 4294967295.
+   * This field MAY be set to the value of the Initiate Tag field.
+   */
+  u32 initial_tsn;
+
+  /* The following field allows to have multiple optional fields which are:
+   * - sctp_ipv4_address
+   * - sctp_ipv6_address
+   * - sctp_cookie_preservative
+   * - sctp_hostname_address
+   * - sctp_supported_address_types
+   */
+  u32 optional_fields[];
+
+} sctp_init_chunk_t;
+
+/*
+ * INIT ACK chunk
+ *
+ * The INIT ACK chunk is used to acknowledge the initiation of an SCTP
+ * association. The parameter part of INIT ACK is formatted similarly to the
+ * INIT chunk.
+ *
+ * It uses two extra variable parameters:
+ * - the State Cookie and
+ * - the Unrecognized Parameter:
+ */
+/*
+ * 0                   1                   2                   3
+ * 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * |   Type = 2    |  Chunk Flags  |      Chunk Length             |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * |                         Initiate Tag                          |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * |              Advertised Receiver Window Credit                |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * |  Number of Outbound Streams   |  Number of Inbound Streams    |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * |                          Initial TSN                          |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * \                                                               \
+ * /              Optional/Variable-Length Parameters              /
+ * \                                                               \
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ */
+typedef sctp_init_chunk_t sctp_init_ack_chunk_t;
+
+/*
+ * 0                   1                   2                   3
+ * 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * |          Parameter Type       |       Parameter Length        |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * \                                                               \
+ * /                       Parameter Value                         /
+ * \                                                               \
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ */
+typedef struct
+{
+  u16 type;
+  u16 length;
+
+} sctp_opt_params_hdr_t;
+
+typedef struct
+{
+  sctp_opt_params_hdr_t param_hdr;
+
+  u64 mac;                     /* RFC 2104 */
+  u32 creation_time;
+  u32 cookie_lifespan;
+
+} sctp_state_cookie_param_t;
+
+/*
+ *  This chunk is used only during the initialization of an association.
+ *  It is sent by the initiator of an association to its peer to complete
+ *  the initialization process.  This chunk MUST precede any DATA chunk
+ *  sent within the association, but MAY be bundled with one or more DATA
+ *  chunks in the same packet.
+ *
+ *  0                   1                   2                   3
+ *  0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
+ *  +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ *  |   Type = 10   |Chunk  Flags   |         Length                |
+ *  +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ *  /                     Cookie                                    /
+ *  \                                                               \
+ *  +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ */
+typedef struct
+{
+  sctp_header_t sctp_hdr;
+  sctp_chunks_common_hdr_t chunk_hdr;
+
+  sctp_state_cookie_param_t cookie;
+
+} sctp_cookie_echo_chunk_t;
+
+
+/*
+ * 0                   1                   2                   3
+ * 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * |   Type = 11   |Chunk  Flags   |     Length = 4                |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ */
+typedef struct
+{
+  sctp_header_t sctp_hdr;
+  sctp_chunks_common_hdr_t chunk_hdr;
+
+} sctp_cookie_ack_chunk_t;
+
+/*
+ * 0                   1                   2                   3
+ * 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * |   Type = 14   |Chunk  Flags   |     Length = 4                |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ */
+typedef struct
+{
+  sctp_header_t sctp_hdr;
+  sctp_chunks_common_hdr_t chunk_hdr;
+
+} sctp_shutdown_complete_chunk_t;
+
+/* OPTIONAL or VARIABLE-LENGTH parameters for INIT */
+#define SCTP_IPV4_ADDRESS_TYPE 5
+#define SCTP_IPV4_ADDRESS_TYPE_LENGTH 8
+#define SCTP_IPV6_ADDRESS_TYPE 6
+#define SCTP_IPV6_ADDRESS_TYPE_LENGTH 20
+#define SCTP_STATE_COOKIE_TYPE         7
+#define SCTP_UNRECOGNIZED_TYPE 8
+#define SCTP_COOKIE_PRESERVATIVE_TYPE  9
+#define SCTP_COOKIE_PRESERVATIVE_TYPE_LENGTH   8
+#define SCTP_HOSTNAME_ADDRESS_TYPE     11
+#define SCTP_SUPPORTED_ADDRESS_TYPES   12
+
+/*
+ * 0                   1                   2                   3
+ * 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * |        Type = 5               |      Length = 8               |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * |                        IPv4 Address                           |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ */
+typedef struct
+{
+  sctp_opt_params_hdr_t param_hdr;
+
+  /*
+   * Contains an IPv4 address of the sending endpoint.
+   * It is binary encoded.
+   */
+  ip4_address_t address;
+
+} sctp_ipv4_addr_param_t;
+
+always_inline void
+vnet_sctp_set_ipv4_address (sctp_ipv4_addr_param_t * a, ip4_address_t address)
+{
+  a->param_hdr.type = clib_host_to_net_u16 (SCTP_IPV4_ADDRESS_TYPE);
+  a->param_hdr.length = clib_host_to_net_u16 (8);
+  a->address.as_u32 = clib_host_to_net_u32 (address.as_u32);
+}
+
+always_inline u32
+vnet_sctp_get_ipv4_address (sctp_ipv4_addr_param_t * a)
+{
+  return (clib_net_to_host_u32 (a->address.as_u32));
+}
+
+/*
+ * 0                   1                   2                   3
+ * 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * |            Type = 6           |          Length = 20          |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * |                                                               |
+ * |                         IPv6 Address                          |
+ * |                                                               |
+ * |                                                               |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ */
+typedef struct
+{
+  sctp_opt_params_hdr_t param_hdr;
+
+  /*
+   * Contains an IPv6 address of the sending endpoint.
+   * It is binary encoded.
+   */
+  ip6_address_t address;
+
+} sctp_ipv6_addr_param_t;
+
+always_inline void
+vnet_sctp_set_ipv6_address (sctp_ipv6_addr_param_t * a, ip6_address_t address)
+{
+  a->param_hdr.type = clib_host_to_net_u16 (SCTP_IPV6_ADDRESS_TYPE);
+  a->param_hdr.length = clib_host_to_net_u16 (20);
+  a->address.as_u64[0] = clib_host_to_net_u64 (address.as_u64[0]);
+  a->address.as_u64[1] = clib_host_to_net_u64 (address.as_u64[1]);
+}
+
+always_inline ip6_address_t
+vnet_sctp_get_ipv6_address (sctp_ipv6_addr_param_t * a)
+{
+  ip6_address_t ip6_address;
+
+  ip6_address.as_u64[0] = clib_net_to_host_u64 (a->address.as_u64[0]);
+  ip6_address.as_u64[1] = clib_net_to_host_u64 (a->address.as_u64[1]);
+
+  return ip6_address;
+}
+
+/*
+ * The sender of the INIT shall use this parameter to suggest to the
+ * receiver of the INIT for a longer life-span of the State Cookie.
+ */
+/*
+ * 0                   1                   2                   3
+ * 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * |          Type = 9             |          Length = 8           |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * |         Suggested Cookie Life-Span Increment (msec.)          |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ */
+typedef struct
+{
+  sctp_opt_params_hdr_t param_hdr;
+
+  /*
+   * This parameter indicates to the receiver how much increment in
+   * milliseconds the sender wishes the receiver to add to its default
+   * cookie life-span.
+   *
+   * This optional parameter should be added to the INIT chunk by the
+   * sender when it reattempts establishing an association with a peer
+   * to which its previous attempt of establishing the association
+   * failed due to a stale cookie operation error. The receiver MAY
+   * choose to ignore the suggested cookie life-span increase for its
+   * own security reasons.
+   */
+  u32 life_span_inc;
+
+} sctp_cookie_preservative_param_t;
+
+always_inline void
+vnet_sctp_set_cookie_preservative (sctp_cookie_preservative_param_t * c,
+                                  u32 life_span_inc)
+{
+  c->param_hdr.type = clib_host_to_net_u16 (SCTP_COOKIE_PRESERVATIVE_TYPE);
+  c->param_hdr.length = clib_host_to_net_u16 (8);
+  c->life_span_inc = clib_host_to_net_u32 (life_span_inc);
+}
+
+always_inline u32
+vnet_sctp_get_cookie_preservative (sctp_cookie_preservative_param_t * c)
+{
+  return (clib_net_to_host_u32 (c->life_span_inc));
+}
+
+#define FQDN_MAX_LENGTH 256
+
+/*
+ * The sender of INIT uses this parameter to pass its Host Name (in
+ * place of its IP addresses) to its peer.
+ * The peer is responsible for resolving the name.
+ * Using this parameter might make it more likely for the association to work
+ * across a NAT box.
+ */
+/*
+ * 0                   1                   2                   3
+ * 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * |          Type = 11            |          Length               |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * /                          Host Name                            /
+ * \                                                               \
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ */
+typedef struct
+{
+  sctp_opt_params_hdr_t param_hdr;
+
+
+  /*
+   * This field contains a host name in "host name syntax" per RFC 1123
+   * Section 2.1
+   *
+   * Note: At least one null terminator is included in the Host Name
+   * string and must be included in the length.
+   */
+  char hostname[FQDN_MAX_LENGTH];
+
+} sctp_hostname_param_t;
+
+always_inline void
+vnet_sctp_set_hostname_address (sctp_hostname_param_t * h, char *hostname)
+{
+  h->param_hdr.length = FQDN_MAX_LENGTH;
+  h->param_hdr.type = clib_host_to_net_u16 (SCTP_HOSTNAME_ADDRESS_TYPE);
+  memset (h->hostname, '0', FQDN_MAX_LENGTH);
+  memcpy (h->hostname, hostname, FQDN_MAX_LENGTH);
+}
+
+#define MAX_SUPPORTED_ADDRESS_TYPES    3
+
+/*
+ * The sender of INIT uses this parameter to list all the address types
+ * it can support.
+ */
+/*
+ * 0                   1                   2                   3
+ * 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * |          Type = 12            |          Length               |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * |        Address Type #1        |        Address Type #2        |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * |                            ......                             |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-++-+-+-+-+-+-+-+-+-+-+-+-+-+-++-+-+-+
+ */
+typedef struct
+{
+  sctp_opt_params_hdr_t param_hdr;
+
+  u16 address_type[MAX_SUPPORTED_ADDRESS_TYPES];
+
+} sctp_supported_addr_types_param_t;
+
+always_inline void
+vnet_sctp_set_supported_address_types (sctp_supported_addr_types_param_t * s)
+{
+  s->param_hdr.type = clib_host_to_net_u16 (SCTP_SUPPORTED_ADDRESS_TYPES);
+  s->param_hdr.length = 4 /* base = type + length */  +
+    MAX_SUPPORTED_ADDRESS_TYPES * 4;   /* each address type is 4 bytes */
+
+  s->address_type[0] = clib_host_to_net_u16 (SCTP_IPV4_ADDRESS_TYPE);
+  s->address_type[1] = clib_host_to_net_u16 (SCTP_IPV6_ADDRESS_TYPE);
+  s->address_type[2] = clib_host_to_net_u16 (SCTP_HOSTNAME_ADDRESS_TYPE);
+}
+
+/*
+ * Error cause codes to be used for the sctp_error_cause.cause_code field
+ */
+#define INVALID_STREAM_IDENTIFIER      1
+#define MISSING_MANDATORY_PARAMETER    2
+#define STALE_COOKIE_ERROR             3
+#define OUT_OF_RESOURCE                        4
+#define UNRESOLVABLE_ADDRESS           5
+#define UNRECOGNIZED_CHUNK_TYPE                6
+#define INVALID_MANDATORY_PARAMETER    7
+#define UNRECOGNIZED_PARAMETER         8
+#define NO_USER_DATA                   9
+#define COOKIE_RECEIVED_WHILE_SHUTTING_DOWN    10
+#define RESTART_OF_ASSOCIATION_WITH_NEW_ADDR   11
+#define USER_INITIATED_ABORT           12
+#define PROTOCOL_VIOLATION             13
+
+always_inline void
+vnet_sctp_set_state_cookie (sctp_state_cookie_param_t * s)
+{
+  s->param_hdr.type = clib_host_to_net_u16 (SCTP_STATE_COOKIE_TYPE);
+
+  /* TODO: length & value to be populated */
+}
+
+typedef struct
+{
+  sctp_opt_params_hdr_t param_hdr;
+
+  u32 value[];
+
+} sctp_unrecognized_param_t;
+
+always_inline void
+vnet_sctp_set_unrecognized_param (sctp_unrecognized_param_t * u)
+{
+  u->param_hdr.type = clib_host_to_net_u16 (UNRECOGNIZED_PARAMETER);
+
+  /* TODO: length & value to be populated */
+}
+
+/*
+ * Selective ACK (SACK) chunk
+ *
+ * This chunk is sent to the peer endpoint to acknowledge received DATA
+ * chunks and to inform the peer endpoint of gaps in the received
+ * subsequences of DATA chunks as represented by their TSNs.
+ */
+/*
+ * 0                   1                   2                   3
+ * 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * |   Type = 3    |Chunk  Flags   |      Chunk Length             |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * |                      Cumulative TSN Ack                       |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * |          Advertised Receiver Window Credit (a_rwnd)           |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * | Number of Gap Ack Blocks = N  |  Number of Duplicate TSNs = X |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * |  Gap Ack Block #1 Start       |   Gap Ack Block #1 End        |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * /                                                               /
+ * \                              ...                              \
+ * /                                                               /
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * |   Gap Ack Block #N Start      |  Gap Ack Block #N End         |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * |                       Duplicate TSN 1                         |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * /                                                               /
+ * \                              ...                              \
+ * /                                                               /
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * |                       Duplicate TSN X                         |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ */
+typedef struct
+{
+  sctp_header_t sctp_hdr;
+  sctp_chunks_common_hdr_t chunk_hdr;
+  /*
+   * This parameter contains the TSN of the last DATA chunk received in
+   * sequence before a gap.  In the case where no DATA chunk has been
+   * received, this value is set to the peer's Initial TSN minus one.
+   */
+  u32 cumulative_tsn_ack;
+
+  /*
+   * This field indicates the updated receive buffer space in bytes of
+   * the sender of this SACK.
+   */
+  u32 a_rwnd;
+
+  /*
+   * Indicates the number of Gap Ack Blocks included in this SACK.
+   */
+  u16 gap_ack_blocks_count;
+
+  /*
+   * This field contains the number of duplicate TSNs the endpoint has
+   * received.  Each duplicate TSN is listed following the Gap Ack Block
+   * list.
+   */
+  u16 duplicate_tsn_count;
+
+  /*
+   * Indicates the Start offset TSN for this Gap Ack Block. To calculate
+   * the actual TSN number the Cumulative TSN Ack is added to this offset
+   * number. This calculated TSN identifies the first TSN in this Gap Ack
+   * Block that has been received.
+   */
+  u16 *gap_ack_block_start;
+
+  /*
+   * Indicates the End offset TSN for this Gap Ack Block. To calculate
+   * the actual TSN number, the Cumulative TSN Ack is added to this offset
+   * number. This calculated TSN identifies the TSN of the last DATA chunk
+   * received in this Gap Ack Block.
+   */
+  u16 *gap_ack_block_end;
+
+  /*
+   * Indicates the number of times a TSN was received in duplicate since
+   * the last SACK was sent. Every time a receiver gets a duplicate TSN
+   * (before sending the SACK), it adds it to the list of duplicates.
+   * The duplicate count is reinitialized to zero after sending each SACK.
+   */
+  u32 duplicate_tsn;
+
+} sctp_selective_ack_chunk_t;
+
+always_inline void
+vnet_sctp_set_cumulative_tsn_ack (sctp_selective_ack_chunk_t * s,
+                                 u32 cumulative_tsn_ack)
+{
+  vnet_sctp_set_chunk_type (&s->chunk_hdr, SACK);
+  s->cumulative_tsn_ack = clib_host_to_net_u32 (cumulative_tsn_ack);
+}
+
+always_inline u32
+vnet_sctp_get_cumulative_tsn_ack (sctp_selective_ack_chunk_t * s)
+{
+  return clib_net_to_host_u32 (s->cumulative_tsn_ack);
+}
+
+always_inline void
+vnet_sctp_set_arwnd (sctp_selective_ack_chunk_t * s, u32 a_rwnd)
+{
+  vnet_sctp_set_chunk_type (&s->chunk_hdr, SACK);
+  s->a_rwnd = clib_host_to_net_u32 (a_rwnd);
+}
+
+always_inline u32
+vnet_sctp_get_arwnd (sctp_selective_ack_chunk_t * s)
+{
+  return clib_net_to_host_u32 (s->a_rwnd);
+}
+
+always_inline void
+vnet_sctp_set_gap_ack_blocks_count (sctp_selective_ack_chunk_t * s,
+                                   u16 gap_ack_blocks_count)
+{
+  vnet_sctp_set_chunk_type (&s->chunk_hdr, SACK);
+  s->gap_ack_blocks_count = clib_host_to_net_u16 (gap_ack_blocks_count);
+
+  if (s->gap_ack_block_start == NULL)
+    s->gap_ack_block_start =
+      clib_mem_alloc (sizeof (u16) * gap_ack_blocks_count);
+  if (s->gap_ack_block_end == NULL)
+    s->gap_ack_block_end =
+      clib_mem_alloc (sizeof (u16) * gap_ack_blocks_count);
+}
+
+always_inline u16
+vnet_sctp_get_gap_ack_blocks_count (sctp_selective_ack_chunk_t * s)
+{
+  return clib_net_to_host_u32 (s->gap_ack_blocks_count);
+}
+
+always_inline void
+vnet_sctp_set_duplicate_tsn_count (sctp_selective_ack_chunk_t * s,
+                                  u16 duplicate_tsn_count)
+{
+  vnet_sctp_set_chunk_type (&s->chunk_hdr, SACK);
+  s->duplicate_tsn_count = clib_host_to_net_u16 (duplicate_tsn_count);
+}
+
+always_inline u16
+vnet_sctp_get_duplicate_tsn_count (sctp_selective_ack_chunk_t * s)
+{
+  return clib_net_to_host_u16 (s->duplicate_tsn_count);
+}
+
+/*
+ * Heartbeat Info
+ *
+ * 0                   1                   2                   3
+ * 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * |    Heartbeat Info Type=1      |         HB Info Length        |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * /                  Sender-Specific Heartbeat Info               /
+ * \                                                               \
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ */
+typedef struct
+{
+  sctp_opt_params_hdr_t param_hdr;
+
+  /*
+   * The Sender-Specific Heartbeat Info field should normally include
+   * information about the sender's current time when this HEARTBEAT
+   * chunk is sent and the destination transport address to which this
+   * HEARTBEAT is sent.
+   * This information is simply reflected back by the receiver in the
+   * HEARTBEAT ACK message.
+   *
+   * Note also that the HEARTBEAT message is both for reachability
+   * checking and for path verification.
+   * When a HEARTBEAT chunk is being used for path verification purposes,
+   * it MUST hold a 64-bit random nonce.
+   */
+  u64 hb_info;
+
+} sctp_hb_info_param_t;
+
+always_inline void
+vnet_sctp_set_heartbeat_info (sctp_hb_info_param_t * h, u64 hb_info,
+                             u16 hb_info_length)
+{
+  h->hb_info = clib_host_to_net_u16 (1);
+  h->param_hdr.length = clib_host_to_net_u16 (hb_info_length);
+  h->hb_info = clib_host_to_net_u64 (hb_info);
+}
+
+/*
+ * Heartbeat Request
+ *
+ * An endpoint should send this chunk to its peer endpoint to probe the
+ * reachability of a particular destination transport address defined in
+ * the present association.
+ * The parameter field contains the Heartbeat Information, which is a
+ * variable-length opaque data structure understood only by the sender.
+ */
+/*
+ * 0                   1                   2                   3
+ * 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * |   Type = 4    | Chunk  Flags  |      Heartbeat Length         |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * \                                                               \
+ * /            Heartbeat Information TLV (Variable-Length)        /
+ * \                                                               \
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ */
+typedef struct
+{
+  sctp_header_t sctp_hdr;
+  sctp_chunks_common_hdr_t chunk_hdr;
+  sctp_hb_info_param_t hb_info;
+
+} sctp_hb_req_chunk_t;
+
+always_inline void
+vnet_sctp_set_hb_request_info (sctp_hb_req_chunk_t * h,
+                              sctp_hb_info_param_t * hb_info)
+{
+  vnet_sctp_set_chunk_type (&h->chunk_hdr, HEARTBEAT);
+  memcpy (&h->hb_info, hb_info, sizeof (h->hb_info));
+}
+
+/*
+ * Heartbeat Acknowledgement
+ *
+ * An endpoint should send this chunk to its peer endpoint as a response
+ * to a HEARTBEAT chunk.
+ * A HEARTBEAT ACK is always sent to the source IP address of the IP datagram
+ * containing the HEARTBEAT chunk to which this ack is responding.
+ */
+/*
+ *
+ * 0                   1                   2                   3
+ * 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * |   Type = 5    | Chunk  Flags  |    Heartbeat Ack Length       |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * \                                                               \
+ * /            Heartbeat Information TLV (Variable-Length)        /
+ * \                                                               \
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ */
+typedef sctp_hb_req_chunk_t sctp_hb_ack_chunk_t;
+
+always_inline void
+vnet_sctp_set_hb_ack_info (sctp_hb_ack_chunk_t * h,
+                          sctp_hb_info_param_t * hb_info)
+{
+  vnet_sctp_set_chunk_type (&h->chunk_hdr, HEARTBEAT_ACK);
+  memcpy (&h->hb_info, hb_info, sizeof (h->hb_info));
+}
+
+/*
+ * Error cause
+ */
+/*
+ * 0                   1                   2                   3
+ * 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * |           Cause Code          |       Cause Length            |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * /                    Cause-Specific Information                 /
+ * \                                                               \
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+*/
+typedef struct
+{
+
+  sctp_opt_params_hdr_t param_hdr;
+  u64 cause_info;
+
+} sctp_err_cause_param_t;
+
+/*
+ * Abort Association (ABORT)
+ *
+ * The ABORT chunk is sent to the peer of an association to close the
+ * association.  The ABORT chunk may contain Cause Parameters to inform
+ * the receiver about the reason of the abort.  DATA chunks MUST NOT be
+ * bundled with ABORT.  Control chunks (except for INIT, INIT ACK, and
+ * SHUTDOWN COMPLETE) MAY be bundled with an ABORT, but they MUST be
+ * placed before the ABORT in the SCTP packet or they will be ignored by
+ * the receiver.
+ *
+ * If an endpoint receives an ABORT with a format error or no TCB is
+ * found, it MUST silently discard it.  Moreover, under any
+ * circumstances, an endpoint that receives an ABORT MUST NOT respond to
+ * that ABORT by sending an ABORT of its own.
+ */
+/*
+ * 0                   1                   2                   3
+ * 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * |   Type = 6    |Reserved     |T|           Length              |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * \                                                               \
+ * /                   zero or more Error Causes                   /
+ * \                                                               \
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ */
+typedef struct
+{
+  sctp_header_t sctp_hdr;
+  sctp_chunks_common_hdr_t chunk_hdr;
+  sctp_err_cause_param_t err_causes[];
+
+} sctp_abort_chunk_t;
+
+always_inline void
+vnet_sctp_set_tbit (sctp_abort_chunk_t * a)
+{
+  vnet_sctp_set_chunk_type (&a->chunk_hdr, ABORT);
+  // a->chunk_hdr.flags = clib_host_to_net_u16 (1);
+}
+
+always_inline void
+vnet_sctp_unset_tbit (sctp_abort_chunk_t * a)
+{
+  vnet_sctp_set_chunk_type (&a->chunk_hdr, ABORT);
+  // a->chunk_hdr.flags = clib_host_to_net_u16 (0);
+}
+
+/*
+ * Shutdown Association (SHUTDOWN)
+ *
+ * An endpoint in an association MUST use this chunk to initiate a
+ * graceful close of the association with its peer.
+ */
+/*
+ * 0                   1                   2                   3
+ * 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * |   Type = 7    | Chunk  Flags  |      Length = 8               |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * |                      Cumulative TSN Ack                       |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ */
+typedef struct
+{
+  sctp_header_t sctp_hdr;
+  sctp_chunks_common_hdr_t chunk_hdr;
+  /*
+   * This parameter contains the TSN of the last chunk received in
+   * sequence before any gaps.
+   *
+   * Note: Since the SHUTDOWN message does not contain Gap Ack Blocks,
+   * it cannot be used to acknowledge TSNs received out of order. In a
+   * SACK, lack of Gap Ack Blocks that were previously included
+   * indicates that the data receiver reneged on the associated DATA
+   * chunks. Since SHUTDOWN does not contain Gap Ack Blocks, the
+   * receiver of the SHUTDOWN shouldn't interpret the lack of a Gap Ack
+   * Block as a renege.
+   */
+  u32 cumulative_tsn_ack;
+
+} sctp_shutdown_association_chunk_t;
+
+always_inline void
+vnet_sctp_set_tsn_last_received_chunk (sctp_shutdown_association_chunk_t * s,
+                                      u32 tsn_last_chunk)
+{
+  vnet_sctp_set_chunk_type (&s->chunk_hdr, SHUTDOWN);
+  s->cumulative_tsn_ack = clib_host_to_net_u32 (tsn_last_chunk);
+}
+
+/*
+ * Shutdown Acknowledgement (SHUTDOWN ACK)
+ *
+ * This chunk MUST be used to acknowledge the receipt of the SHUTDOWN
+ * chunk at the completion of the shutdown process.
+ */
+/*
+ * 0                   1                   2                   3
+ * 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * |   Type = 8    |Chunk  Flags   |      Length = 4               |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ */
+typedef struct
+{
+  sctp_header_t sctp_hdr;
+  sctp_chunks_common_hdr_t chunk_hdr;
+} sctp_shutdown_ack_chunk_t;
+
+always_inline void
+vnet_sctp_fill_shutdown_ack (sctp_shutdown_ack_chunk_t * s)
+{
+  vnet_sctp_set_chunk_type (&s->chunk_hdr, SHUTDOWN_ACK);
+  vnet_sctp_set_chunk_length (&s->chunk_hdr, 4);
+}
+
+#endif /* included_vnet_sctp_packet_h */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/sctp/sctp_pg.c b/src/vnet/sctp/sctp_pg.c
new file mode 100644 (file)
index 0000000..d253330
--- /dev/null
@@ -0,0 +1,30 @@
+/*
+ * Copyright (c) 2017 SUSE LLC.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include <vnet/ip/ip.h>
+#include <vnet/pg/pg.h>
+
+uword
+unformat_pg_sctp_header (unformat_input_t * input, va_list * args)
+{
+  return 1;
+}
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/sctp/sctp_timer.h b/src/vnet/sctp/sctp_timer.h
new file mode 100644 (file)
index 0000000..259dea9
--- /dev/null
@@ -0,0 +1,29 @@
+/*
+ * Copyright (c) 2017 SUSE LLC.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef included_sctp_timer_h__
+#define included_sctp_timer_h__
+
+#include <vppinfra/tw_timer_16t_2w_512sl.h>
+#include <vppinfra/tw_timer_16t_1w_2048sl.h>
+
+#endif /* included_sctp_timer_h__ */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
index 8dab3d6..ec31789 100644 (file)
@@ -326,7 +326,22 @@ unformat_vnet_uri (unformat_input_t * input, va_list * args)
       sep->is_ip4 = 0;
       return 1;
     }
-
+  if (unformat (input, "sctp://%U/%d", unformat_ip4_address, &sep->ip.ip4,
+               &sep->port))
+    {
+      sep->transport_proto = TRANSPORT_PROTO_SCTP;
+      sep->port = clib_host_to_net_u16 (sep->port);
+      sep->is_ip4 = 1;
+      return 1;
+    }
+  if (unformat (input, "sctp://%U/%d", unformat_ip6_address, &sep->ip.ip6,
+               &sep->port))
+    {
+      sep->transport_proto = TRANSPORT_PROTO_SCTP;
+      sep->port = clib_host_to_net_u16 (sep->port);
+      sep->is_ip4 = 0;
+      return 1;
+    }
   return 0;
 }
 
index 2f01ac6..964e739 100644 (file)
@@ -54,6 +54,9 @@ format_transport_proto (u8 * s, va_list * args)
     case TRANSPORT_PROTO_UDP:
       s = format (s, "UDP");
       break;
+    case TRANSPORT_PROTO_SCTP:
+      s = format (s, "SCTP");
+      break;
     }
   return s;
 }
@@ -86,6 +89,10 @@ unformat_transport_proto (unformat_input_t * input, va_list * args)
     *proto = TRANSPORT_PROTO_UDP;
   else if (unformat (input, "UDP"))
     *proto = TRANSPORT_PROTO_UDP;
+  if (unformat (input, "sctp"))
+    *proto = TRANSPORT_PROTO_SCTP;
+  else if (unformat (input, "SCTP"))
+    *proto = TRANSPORT_PROTO_SCTP;
   else
     return 0;
   return 1;
index 61a2b7b..12b6a05 100644 (file)
@@ -70,6 +70,7 @@ typedef enum _transport_proto
 {
   TRANSPORT_PROTO_TCP,
   TRANSPORT_PROTO_UDP,
+  TRANSPORT_PROTO_SCTP,
   TRANSPORT_N_PROTO
 } transport_proto_t;
 
@@ -110,7 +111,7 @@ transport_endpoint_fib_proto (transport_endpoint_t * tep)
 always_inline u8
 transport_is_stream (u8 proto)
 {
-  return (proto == TRANSPORT_PROTO_TCP);
+  return ((proto == TRANSPORT_PROTO_TCP) || (proto == TRANSPORT_PROTO_SCTP));
 }
 
 always_inline u8
diff --git a/test/test_sctp.py b/test/test_sctp.py
new file mode 100644 (file)
index 0000000..f25fb7f
--- /dev/null
@@ -0,0 +1,91 @@
+#!/usr/bin/env python
+
+import unittest
+
+from framework import VppTestCase, VppTestRunner
+from vpp_ip_route import VppIpTable, VppIpRoute, VppRoutePath
+
+
+class TestSCTP(VppTestCase):
+    """ SCTP Test Case """
+
+    @classmethod
+    def setUpClass(cls):
+        super(TestSCTP, cls).setUpClass()
+
+    def setUp(self):
+        super(TestSCTP, self).setUp()
+        self.vapi.session_enable_disable(is_enabled=1)
+        self.create_loopback_interfaces(range(2))
+
+        table_id = 0
+
+        for i in self.lo_interfaces:
+            i.admin_up()
+
+            if table_id != 0:
+                tbl = VppIpTable(self, table_id)
+                tbl.add_vpp_config()
+
+            i.set_table_ip4(table_id)
+            i.config_ip4()
+            table_id += 1
+
+        # Configure namespaces
+        self.vapi.app_namespace_add(namespace_id="0",
+                                    sw_if_index=self.loop0.sw_if_index)
+        self.vapi.app_namespace_add(namespace_id="1",
+                                    sw_if_index=self.loop1.sw_if_index)
+
+    def tearDown(self):
+        for i in self.lo_interfaces:
+            i.unconfig_ip4()
+            i.set_table_ip4(0)
+            i.admin_down()
+        self.vapi.session_enable_disable(is_enabled=0)
+        super(TestSCTP, self).tearDown()
+
+    def test_sctp_unittest(self):
+        """ SCTP Unit Tests """
+        error = self.vapi.cli("test sctp all")
+
+        if error:
+            self.logger.critical(error)
+        self.assertEqual(error.find("failed"), -1)
+
+    def test_sctp_transfer(self):
+        """ SCTP builtin client/server transfer """
+
+        # Add inter-table routes
+        ip_t01 = VppIpRoute(self, self.loop1.local_ip4, 32,
+                            [VppRoutePath("0.0.0.0",
+                                          0xffffffff,
+                                          nh_table_id=1)])
+        ip_t10 = VppIpRoute(self, self.loop0.local_ip4, 32,
+                            [VppRoutePath("0.0.0.0",
+                                          0xffffffff,
+                                          nh_table_id=0)], table_id=1)
+        ip_t01.add_vpp_config()
+        ip_t10.add_vpp_config()
+
+        # Start builtin server and client
+        uri = "sctp://" + self.loop0.local_ip4 + "/1234"
+        error = self.vapi.cli("test sctp server appns 0 fifo-size 4 uri " +
+                              uri)
+        if error:
+            self.logger.critical(error)
+
+        error = self.vapi.cli("test sctp client mbytes 10 appns 1" +
+                              " fifo-size 4" +
+                              " no-output test-bytes syn-timeout 20 " +
+                              " uri " + uri)
+        if error:
+            self.logger.critical(error)
+        self.assertEqual(error.find("failed"), -1)
+
+        # Delete inter-table routes
+        ip_t01.remove_vpp_config()
+        ip_t10.remove_vpp_config()
+
+if __name__ == '__main__':
+    unittest.main(testRunner=VppTestRunner)