SCTP: congestion control
[vpp.git] / src / vnet / sctp / sctp_input.c
index 8f4b043..35218d5 100644 (file)
@@ -399,7 +399,8 @@ sctp_handle_init (sctp_header_t * sctp_hdr,
     }
 
   /* Reuse buffer to make init-ack and send */
-  sctp_prepare_initack_chunk (sctp_conn, b0, ip4_addr, ip6_addr);
+  sctp_prepare_initack_chunk (sctp_conn, MAIN_SCTP_SUB_CONN_IDX, b0, ip4_addr,
+                             ip6_addr);
   return SCTP_ERROR_NONE;
 }
 
@@ -530,12 +531,14 @@ sctp_handle_init_ack (sctp_header_t * sctp_hdr,
        }
     }
 
-  sctp_prepare_cookie_echo_chunk (sctp_conn, b0, &state_cookie);
+  sctp_prepare_cookie_echo_chunk (sctp_conn, idx, b0, &state_cookie);
 
   /* Start the T1_COOKIE timer */
-  sctp_timer_set (sctp_conn, sctp_pick_conn_idx_on_chunk (COOKIE_ECHO),
+  sctp_timer_set (sctp_conn, idx,
                  SCTP_TIMER_T1_COOKIE, sctp_conn->sub_conn[idx].RTO);
 
+  stream_session_accept_notify (&sctp_conn->sub_conn[idx].connection);
+
   return SCTP_ERROR_NONE;
 }
 
@@ -648,7 +651,7 @@ sctp_session_enqueue_data (sctp_connection_t * sctp_conn, vlib_buffer_t * b,
 }
 
 always_inline u8
-sctp_is_sack_delayable (sctp_connection_t * sctp_conn, u8 is_gapping)
+sctp_is_sack_delayable (sctp_connection_t * sctp_conn, u8 idx, u8 is_gapping)
 {
   if (is_gapping != 0)
     {
@@ -679,7 +682,7 @@ sctp_is_connection_gapping (sctp_connection_t * sctp_conn, u32 tsn,
     {
       SCTP_CONN_TRACKING_DBG
        ("GAPPING: CONN_INDEX = %u, sctp_conn->next_tsn_expected = %u, tsn = %u, diff = %u",
-        sctp_conn->sub_conn[idx].connection.c_index,
+        sctp_conn->sub_conn[MAIN_SCTP_SUB_CONN_IDX].connection.c_index,
         sctp_conn->next_tsn_expected, tsn,
         sctp_conn->next_tsn_expected - tsn);
 
@@ -723,7 +726,11 @@ sctp_handle_data (sctp_payload_data_chunk_t * sctp_data_chunk,
     {
       /* In order data, enqueue. Fifo figures out by itself if any out-of-order
        * segments can be enqueued after fifo tail offset changes. */
-      error = sctp_session_enqueue_data (sctp_conn, b, n_data_bytes, idx);
+      if (PREDICT_FALSE (is_gapping == 1))
+       error =
+         sctp_session_enqueue_data_ooo (sctp_conn, b, n_data_bytes, idx);
+      else
+       error = sctp_session_enqueue_data (sctp_conn, b, n_data_bytes, idx);
     }
   else if (bbit == 1 && ebit == 0)     /* First piece of a fragmented user message */
     {
@@ -751,8 +758,10 @@ sctp_handle_data (sctp_payload_data_chunk_t * sctp_data_chunk,
 
   SCTP_ADV_DBG ("POINTER_WITH_DATA = %p", b->data);
 
-  if (!sctp_is_sack_delayable (sctp_conn, is_gapping))
-    sctp_prepare_sack_chunk (sctp_conn, b);
+  if (!sctp_is_sack_delayable (sctp_conn, idx, is_gapping))
+    sctp_prepare_sack_chunk (sctp_conn, idx, b);
+
+  sctp_conn->sub_conn[idx].enqueue_state = error;
 
   return error;
 }
@@ -787,17 +796,16 @@ sctp_handle_cookie_echo (sctp_header_t * sctp_hdr,
       return SCTP_ERROR_COOKIE_ECHO_VIOLATION;
     }
 
-  sctp_prepare_cookie_ack_chunk (sctp_conn, b0);
+  sctp_prepare_cookie_ack_chunk (sctp_conn, idx, b0);
 
   /* Change state */
   sctp_conn->state = SCTP_STATE_ESTABLISHED;
+  sctp_conn->sub_conn[idx].state = SCTP_SUBCONN_STATE_UP;
   *next0 = sctp_next_output (sctp_conn->sub_conn[idx].c_is_ip4);
 
   sctp_timer_set (sctp_conn, idx, SCTP_TIMER_T4_HEARTBEAT,
                  sctp_conn->sub_conn[idx].RTO);
 
-  stream_session_accept_notify (&sctp_conn->sub_conn[idx].connection);
-
   return SCTP_ERROR_NONE;
 
 }
@@ -819,13 +827,13 @@ sctp_handle_cookie_ack (sctp_header_t * sctp_hdr,
   sctp_timer_reset (sctp_conn, idx, SCTP_TIMER_T1_COOKIE);
   /* Change state */
   sctp_conn->state = SCTP_STATE_ESTABLISHED;
+  sctp_conn->sub_conn[idx].state = SCTP_SUBCONN_STATE_UP;
+
   *next0 = sctp_next_drop (sctp_conn->sub_conn[idx].c_is_ip4);
 
   sctp_timer_set (sctp_conn, idx, SCTP_TIMER_T4_HEARTBEAT,
                  sctp_conn->sub_conn[idx].RTO);
 
-  stream_session_accept_notify (&sctp_conn->sub_conn[idx].connection);
-
   return SCTP_ERROR_NONE;
 
 }
@@ -860,7 +868,7 @@ sctp46_rcv_phase_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
          ip6_header_t *ip6_hdr = 0;
          sctp_connection_t *sctp_conn, *new_sctp_conn;
          u16 sctp_implied_length = 0;
-         u16 error0 = SCTP_ERROR_NONE, next0 = SCTP_RCV_PHASE_N_NEXT;
+         u16 error0 = SCTP_ERROR_NONE, next0 = sctp_next_drop (is_ip4);
          u8 idx;
 
          bi0 = from[0];
@@ -890,36 +898,18 @@ sctp46_rcv_phase_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
            {
              ip4_hdr = vlib_buffer_get_current (b0);
              sctp_hdr = ip4_next_header (ip4_hdr);
+             idx = sctp_sub_conn_id_via_ip4h (sctp_conn, ip4_hdr);
            }
          else
            {
              ip6_hdr = vlib_buffer_get_current (b0);
              sctp_hdr = ip6_next_header (ip6_hdr);
+             idx = sctp_sub_conn_id_via_ip6h (sctp_conn, ip6_hdr);
            }
-         idx = sctp_pick_conn_idx_on_state (sctp_conn->state);
 
+         sctp_conn->sub_conn[idx].parent = sctp_conn;
          sctp_full_hdr_t *full_hdr = (sctp_full_hdr_t *) sctp_hdr;
 
-         transport_connection_t *trans_conn =
-           &sctp_conn->sub_conn[idx].connection;
-
-         trans_conn->lcl_port = sctp_hdr->dst_port;
-         trans_conn->rmt_port = sctp_hdr->src_port;
-         trans_conn->is_ip4 = is_ip4;
-
-         if (is_ip4)
-           {
-             trans_conn->lcl_ip.ip4.as_u32 = ip4_hdr->dst_address.as_u32;
-             trans_conn->rmt_ip.ip4.as_u32 = ip4_hdr->src_address.as_u32;
-           }
-         else
-           {
-             clib_memcpy (&trans_conn->lcl_ip.ip6, &ip6_hdr->dst_address,
-                          sizeof (ip6_address_t));
-             clib_memcpy (&trans_conn->rmt_ip.ip6, &ip6_hdr->src_address,
-                          sizeof (ip6_address_t));
-           }
-
          sctp_chunk_hdr =
            (sctp_chunks_common_hdr_t *) (&full_hdr->common_hdr);
 
@@ -944,6 +934,8 @@ sctp46_rcv_phase_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
                    new_sctp_conn - tm->connections[my_thread_index];
                  new_sctp_conn->sub_conn[idx].c_thread_index =
                    my_thread_index;
+                 new_sctp_conn->sub_conn[idx].PMTU =
+                   sctp_conn->sub_conn[idx].PMTU;
                  new_sctp_conn->sub_conn[idx].parent = new_sctp_conn;
 
                  if (sctp_half_open_connection_cleanup (sctp_conn))
@@ -959,7 +951,7 @@ sctp46_rcv_phase_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
                                          new_sctp_conn, idx, b0,
                                          sctp_implied_length);
 
-                 sctp_init_mss (new_sctp_conn);
+                 sctp_init_cwnd (new_sctp_conn);
 
                  if (session_stream_connect_notify
                      (&new_sctp_conn->sub_conn[idx].connection, 0))
@@ -970,8 +962,8 @@ sctp46_rcv_phase_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
                      sctp_connection_cleanup (new_sctp_conn);
                      goto drop;
                    }
+                 next0 = sctp_next_output (is_ip4);
                }
-             next0 = sctp_next_output (is_ip4);
              break;
 
              /* All UNEXPECTED scenarios (wrong chunk received per state-machine)
@@ -1113,11 +1105,11 @@ sctp_handle_shutdown (sctp_header_t * sctp_hdr,
     case SCTP_STATE_ESTABLISHED:
       if (sctp_check_outstanding_data_chunks (sctp_conn) == 0)
        sctp_conn->state = SCTP_STATE_SHUTDOWN_RECEIVED;
-      sctp_send_shutdown_ack (sctp_conn, b0);
+      sctp_send_shutdown_ack (sctp_conn, idx, b0);
       break;
 
     case SCTP_STATE_SHUTDOWN_SENT:
-      sctp_send_shutdown_ack (sctp_conn, b0);
+      sctp_send_shutdown_ack (sctp_conn, idx, b0);
       break;
     }
 
@@ -1156,7 +1148,7 @@ sctp_handle_shutdown_ack (sctp_header_t * sctp_hdr,
   sctp_timer_reset (sctp_conn, MAIN_SCTP_SUB_CONN_IDX,
                    SCTP_TIMER_T2_SHUTDOWN);
 
-  sctp_send_shutdown_complete (sctp_conn, b0);
+  sctp_send_shutdown_complete (sctp_conn, idx, b0);
 
   *next0 = sctp_next_output (sctp_conn->sub_conn[idx].c_is_ip4);
 
@@ -1185,13 +1177,11 @@ sctp_handle_shutdown_complete (sctp_header_t * sctp_hdr,
   if (sctp_is_bundling (sctp_implied_length, &shutdown_complete->chunk_hdr))
     return SCTP_ERROR_BUNDLING_VIOLATION;
 
-  sctp_timer_reset (sctp_conn, MAIN_SCTP_SUB_CONN_IDX,
-                   SCTP_TIMER_T2_SHUTDOWN);
+  sctp_timer_reset (sctp_conn, idx, SCTP_TIMER_T2_SHUTDOWN);
 
-  sctp_conn->state = SCTP_STATE_CLOSED;
+  stream_session_disconnect_notify (&sctp_conn->sub_conn[idx].connection);
 
-  stream_session_disconnect_notify (&sctp_conn->sub_conn
-                                   [MAIN_SCTP_SUB_CONN_IDX].connection);
+  sctp_conn->state = SCTP_STATE_CLOSED;
 
   *next0 = sctp_next_drop (sctp_conn->sub_conn[idx].c_is_ip4);
 
@@ -1229,6 +1219,7 @@ sctp46_shutdown_phase_inline (vlib_main_t * vm,
          sctp_connection_t *sctp_conn;
          u16 sctp_implied_length = 0;
          u16 error0 = SCTP_ERROR_NONE, next0 = SCTP_RCV_PHASE_N_NEXT;
+         u8 idx = 0;
 
          bi0 = from[0];
          to_next[0] = bi0;
@@ -1254,11 +1245,13 @@ sctp46_shutdown_phase_inline (vlib_main_t * vm,
            {
              ip4_hdr = vlib_buffer_get_current (b0);
              sctp_hdr = ip4_next_header (ip4_hdr);
+             idx = sctp_sub_conn_id_via_ip4h (sctp_conn, ip4_hdr);
            }
          else
            {
              ip6_hdr = vlib_buffer_get_current (b0);
              sctp_hdr = ip6_next_header (ip6_hdr);
+             idx = sctp_sub_conn_id_via_ip6h (sctp_conn, ip6_hdr);
            }
 
          sctp_full_hdr_t *full_hdr = (sctp_full_hdr_t *) sctp_hdr;
@@ -1267,8 +1260,6 @@ sctp46_shutdown_phase_inline (vlib_main_t * vm,
          sctp_implied_length =
            sctp_calculate_implied_length (ip4_hdr, ip6_hdr, is_ip4);
 
-         u8 idx = sctp_pick_conn_idx_on_state (sctp_conn->state);
-
          u8 chunk_type = vnet_sctp_get_chunk_type (sctp_chunk_hdr);
          switch (chunk_type)
            {
@@ -1427,6 +1418,19 @@ sctp_handle_sack (sctp_selective_ack_chunk_t * sack_chunk,
       return SCTP_ERROR_INVALID_TAG;
     }
 
+  sctp_conn->sub_conn[idx].last_seen = sctp_time_now ();
+
+  /* Section 7.2.2; point (2) */
+  if (sctp_conn->sub_conn[idx].cwnd > sctp_conn->sub_conn[idx].ssthresh)
+    sctp_conn->sub_conn[idx].partially_acked_bytes =
+      sctp_conn->next_tsn - sack_chunk->cumulative_tsn_ack;
+
+  /* Section 7.2.2; point (5) */
+  if (sctp_conn->next_tsn - sack_chunk->cumulative_tsn_ack == 0)
+    sctp_conn->sub_conn[idx].partially_acked_bytes = 0;
+
+  sctp_conn->last_unacked_tsn = sack_chunk->cumulative_tsn_ack;
+
   sctp_calculate_rto (sctp_conn, idx);
 
   sctp_timer_update (sctp_conn, idx, SCTP_TIMER_T3_RXTX,
@@ -1450,7 +1454,7 @@ sctp_handle_heartbeat (sctp_hb_req_chunk_t * sctp_hb_chunk,
       return SCTP_ERROR_INVALID_TAG;
     }
 
-  sctp_prepare_heartbeat_ack_chunk (sctp_conn, b0);
+  sctp_prepare_heartbeat_ack_chunk (sctp_conn, idx, b0);
 
   *next0 = sctp_next_output (sctp_conn->sub_conn[idx].connection.is_ip4);
 
@@ -1462,6 +1466,8 @@ sctp_handle_heartbeat_ack (sctp_hb_ack_chunk_t * sctp_hb_ack_chunk,
                           sctp_connection_t * sctp_conn, u8 idx,
                           vlib_buffer_t * b0, u16 * next0)
 {
+  sctp_conn->sub_conn[idx].last_seen = sctp_time_now ();
+
   sctp_conn->sub_conn[idx].unacknowledged_hb -= 1;
 
   sctp_timer_update (sctp_conn, idx, SCTP_TIMER_T4_HEARTBEAT,
@@ -1513,7 +1519,7 @@ sctp46_listen_process_inline (vlib_main_t * vm,
          ip6_header_t *ip6_hdr;
          sctp_connection_t *child_conn;
          sctp_connection_t *sctp_listener;
-         u16 next0 = SCTP_LISTEN_PHASE_N_NEXT, error0 = SCTP_ERROR_ENQUEUED;
+         u16 next0 = sctp_next_drop (is_ip4), error0 = SCTP_ERROR_ENQUEUED;
 
          bi0 = from[0];
          to_next[0] = bi0;
@@ -1563,6 +1569,8 @@ sctp46_listen_process_inline (vlib_main_t * vm,
          child_conn->sub_conn[MAIN_SCTP_SUB_CONN_IDX].c_is_ip4 = is_ip4;
          child_conn->sub_conn[MAIN_SCTP_SUB_CONN_IDX].connection.proto =
            sctp_listener->sub_conn[MAIN_SCTP_SUB_CONN_IDX].connection.proto;
+         child_conn->sub_conn[MAIN_SCTP_SUB_CONN_IDX].PMTU =
+           sctp_listener->sub_conn[MAIN_SCTP_SUB_CONN_IDX].PMTU;
          child_conn->state = SCTP_STATE_CLOSED;
 
          if (is_ip4)
@@ -1612,7 +1620,7 @@ sctp46_listen_process_inline (vlib_main_t * vm,
                sctp_handle_init (sctp_hdr, sctp_chunk_hdr, child_conn, b0,
                                  sctp_implied_length);
 
-             sctp_init_mss (child_conn);
+             sctp_init_cwnd (child_conn);
 
              if (error0 == SCTP_ERROR_NONE)
                {
@@ -1627,8 +1635,8 @@ sctp46_listen_process_inline (vlib_main_t * vm,
                      error0 = SCTP_ERROR_CREATE_SESSION_FAIL;
                      goto drop;
                    }
+                 next0 = sctp_next_output (is_ip4);
                }
-             next0 = sctp_next_output (is_ip4);
              break;
 
              /* Reception of a DATA chunk whilst in the CLOSED state is called
@@ -1729,39 +1737,18 @@ sctp46_established_phase_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
            {
              ip4_hdr = vlib_buffer_get_current (b0);
              sctp_hdr = ip4_next_header (ip4_hdr);
+             idx = sctp_sub_conn_id_via_ip4h (sctp_conn, ip4_hdr);
            }
          else
            {
              ip6_hdr = vlib_buffer_get_current (b0);
              sctp_hdr = ip6_next_header (ip6_hdr);
+             idx = sctp_sub_conn_id_via_ip6h (sctp_conn, ip6_hdr);
            }
 
-         idx = sctp_pick_conn_idx_on_state (sctp_conn->state);
-
-         sctp_full_hdr_t *full_hdr = (sctp_full_hdr_t *) sctp_hdr;
-
-         transport_connection_t *trans_conn =
-           &sctp_conn->sub_conn[idx].connection;
-
-         trans_conn->lcl_port = sctp_hdr->dst_port;
-         trans_conn->rmt_port = sctp_hdr->src_port;
-         trans_conn->is_ip4 = is_ip4;
-
          sctp_conn->sub_conn[idx].parent = sctp_conn;
 
-         if (is_ip4)
-           {
-             trans_conn->lcl_ip.ip4.as_u32 = ip4_hdr->dst_address.as_u32;
-             trans_conn->rmt_ip.ip4.as_u32 = ip4_hdr->src_address.as_u32;
-           }
-         else
-           {
-             clib_memcpy (&trans_conn->lcl_ip.ip6, &ip6_hdr->dst_address,
-                          sizeof (ip6_address_t));
-             clib_memcpy (&trans_conn->rmt_ip.ip6, &ip6_hdr->src_address,
-                          sizeof (ip6_address_t));
-           }
-
+         sctp_full_hdr_t *full_hdr = (sctp_full_hdr_t *) sctp_hdr;
          sctp_chunk_hdr =
            (sctp_chunks_common_hdr_t *) (&full_hdr->common_hdr);
 
@@ -2082,9 +2069,6 @@ sctp46_input_dispatcher (vlib_main_t * vm, vlib_node_runtime_t * node,
              goto done;
            }
 
-#if SCTP_DEBUG_STATE_MACHINE
-         u8 idx = sctp_pick_conn_idx_on_state (sctp_conn->state);
-#endif
          vnet_buffer (b0)->sctp.hdr_offset =
            (u8 *) sctp_hdr - (u8 *) vlib_buffer_get_current (b0);
 
@@ -2099,15 +2083,12 @@ sctp46_input_dispatcher (vlib_main_t * vm, vlib_node_runtime_t * node,
              next0 = tm->dispatch_table[sctp_conn->state][chunk_type].next;
              error0 = tm->dispatch_table[sctp_conn->state][chunk_type].error;
 
-             SCTP_DBG_STATE_MACHINE ("CONNECTION_INDEX = %u: "
-                                     "CURRENT_CONNECTION_STATE = %s,"
-                                     "CHUNK_TYPE_RECEIVED = %s "
-                                     "NEXT_PHASE = %s",
-                                     sctp_conn->sub_conn
-                                     [idx].connection.c_index,
-                                     sctp_state_to_string (sctp_conn->state),
-                                     sctp_chunk_to_string (type),
-                                     phase_to_string (next0));
+             SCTP_DBG_STATE_MACHINE
+               ("SESSION_INDEX = %u, CURRENT_CONNECTION_STATE = %s,"
+                "CHUNK_TYPE_RECEIVED = %s " "NEXT_PHASE = %s",
+                sctp_conn->sub_conn[MAIN_SCTP_SUB_CONN_IDX].
+                connection.s_index, sctp_state_to_string (sctp_conn->state),
+                sctp_chunk_to_string (chunk_type), phase_to_string (next0));
 
              if (chunk_type == DATA)
                SCTP_ADV_DBG ("n_advance_bytes0 = %u, n_data_bytes0 = %u",
@@ -2247,7 +2228,7 @@ do {                                                              \
    * _(SHUTDOWN_RECEIVED, "SHUTDOWN_RECEIVED")   \
    * _(SHUTDOWN_ACK_SENT, "SHUTDOWN_ACK_SENT")
    */
-  _(CLOSED, DATA, SCTP_INPUT_NEXT_LISTEN_PHASE, SCTP_ERROR_NONE);      /* UNEXPECTED DATA chunk which requires special handling */
+  //_(CLOSED, DATA, SCTP_INPUT_NEXT_LISTEN_PHASE, SCTP_ERROR_NONE);     /* UNEXPECTED DATA chunk which requires special handling */
   _(CLOSED, INIT, SCTP_INPUT_NEXT_LISTEN_PHASE, SCTP_ERROR_NONE);
   _(CLOSED, INIT_ACK, SCTP_INPUT_NEXT_DROP, SCTP_ERROR_ACK_DUP);       /* UNEXPECTED INIT_ACK chunk */
   _(CLOSED, SACK, SCTP_INPUT_NEXT_DROP, SCTP_ERROR_SACK_CHUNK_VIOLATION);      /* UNEXPECTED SACK chunk */