udp: refactor udp code
[vpp.git] / src / vnet / tcp / tcp_input.c
index 0a36d06..73642df 100644 (file)
@@ -937,6 +937,7 @@ tcp_cc_recovery_exit (tcp_connection_t * tc)
   tc->rto_boff = 0;
   tcp_update_rto (tc);
   tc->snd_rxt_ts = 0;
+  tc->snd_nxt = tc->snd_una_max;
   tcp_recovery_off (tc);
   TCP_EVT_DBG (TCP_EVT_CC_EVT, tc, 3);
 }
@@ -947,6 +948,7 @@ tcp_cc_fastrecovery_exit (tcp_connection_t * tc)
   tc->cc_algo->recovered (tc);
   tc->snd_rxt_bytes = 0;
   tc->rcv_dupacks = 0;
+  tc->snd_nxt = tc->snd_una_max;
   tcp_fastrecovery_off (tc);
   tcp_fastrecovery_1_smss_off (tc);
   TCP_EVT_DBG (TCP_EVT_CC_EVT, tc, 3);
@@ -1386,8 +1388,8 @@ tcp_session_enqueue_data (tcp_connection_t * tc, vlib_buffer_t * b,
       return TCP_ERROR_PURE_ACK;
     }
 
-  written = stream_session_enqueue_data (&tc->connection, b, 0,
-                                        1 /* queue event */ , 1);
+  written = session_enqueue_stream_connection (&tc->connection, b, 0,
+                                              1 /* queue event */ , 1);
 
   TCP_EVT_DBG (TCP_EVT_INPUT, tc, 0, data_len, written);
 
@@ -1448,9 +1450,10 @@ tcp_session_enqueue_ooo (tcp_connection_t * tc, vlib_buffer_t * b,
     }
 
   /* Enqueue out-of-order data with relative offset */
-  rv = stream_session_enqueue_data (&tc->connection, b,
-                                   vnet_buffer (b)->tcp.seq_number -
-                                   tc->rcv_nxt, 0 /* queue event */ , 0);
+  rv = session_enqueue_stream_connection (&tc->connection, b,
+                                         vnet_buffer (b)->tcp.seq_number -
+                                         tc->rcv_nxt, 0 /* queue event */ ,
+                                         0);
 
   /* Nothing written */
   if (rv)
@@ -1467,7 +1470,7 @@ tcp_session_enqueue_ooo (tcp_connection_t * tc, vlib_buffer_t * b,
       ooo_segment_t *newest;
       u32 start, end;
 
-      s0 = stream_session_get (tc->c_s_index, tc->c_thread_index);
+      s0 = session_get (tc->c_s_index, tc->c_thread_index);
 
       /* Get the newest segment from the fifo */
       newest = svm_fifo_newest_ooo_segment (s0->server_rx_fifo);
@@ -1626,7 +1629,7 @@ format_tcp_rx_trace (u8 * s, va_list * args)
   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
   tcp_rx_trace_t *t = va_arg (*args, tcp_rx_trace_t *);
-  uword indent = format_get_indent (s);
+  u32 indent = format_get_indent (s);
 
   s = format (s, "%U\n%U%U",
              format_tcp_header, &t->tcp_header, 128,
@@ -1667,15 +1670,16 @@ tcp_set_rx_trace_data (tcp_rx_trace_t * t0, tcp_connection_t * tc0,
 }
 
 always_inline void
-tcp_established_inc_counter (vlib_main_t * vm, u8 is_ip4, u8 evt, u8 val)
+tcp_node_inc_counter (vlib_main_t * vm, u32 tcp4_node, u32 tcp6_node,
+                     u8 is_ip4, u8 evt, u8 val)
 {
   if (PREDICT_TRUE (!val))
     return;
 
   if (is_ip4)
-    vlib_node_increment_counter (vm, tcp4_established_node.index, evt, val);
+    vlib_node_increment_counter (vm, tcp4_node, evt, val);
   else
-    vlib_node_increment_counter (vm, tcp6_established_node.index, evt, val);
+    vlib_node_increment_counter (vm, tcp6_node, evt, val);
 }
 
 always_inline uword
@@ -1785,8 +1789,11 @@ tcp46_established_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
     }
 
-  errors = session_manager_flush_enqueue_events (my_thread_index);
-  tcp_established_inc_counter (vm, is_ip4, TCP_ERROR_EVENT_FIFO_FULL, errors);
+  errors = session_manager_flush_enqueue_events (TRANSPORT_PROTO_TCP,
+                                                my_thread_index);
+  tcp_node_inc_counter (vm, is_ip4, tcp4_established_node.index,
+                       tcp6_established_node.index,
+                       TCP_ERROR_EVENT_FIFO_FULL, errors);
   tcp_flush_frame_to_output (vm, my_thread_index, is_ip4);
 
   return from_frame->n_vectors;
@@ -1857,7 +1864,9 @@ vlib_node_registration_t tcp6_syn_sent_node;
 static u8
 tcp_lookup_is_valid (tcp_connection_t * tc, tcp_header_t * hdr)
 {
-  transport_connection_t *tmp;
+  transport_connection_t *tmp = 0;
+  u64 handle;
+
   if (!tc)
     return 1;
 
@@ -1867,15 +1876,16 @@ tcp_lookup_is_valid (tcp_connection_t * tc, tcp_header_t * hdr)
 
   if (!is_valid)
     {
-      if ((tmp =
-          stream_session_half_open_lookup (&tc->c_lcl_ip, &tc->c_rmt_ip,
-                                           tc->c_lcl_port, tc->c_rmt_port,
-                                           tc->c_transport_proto)))
+      handle = session_lookup_half_open_handle (&tc->connection);
+      tmp = session_lookup_half_open_connection (handle & 0xFFFFFFFF,
+                                                tc->c_proto, tc->c_is_ip4);
+
+      if (tmp)
        {
          if (tmp->lcl_port == hdr->dst_port
              && tmp->rmt_port == hdr->src_port)
            {
-             clib_warning ("half-open is valid!");
+             TCP_DBG ("half-open is valid!");
            }
        }
     }
@@ -1886,7 +1896,8 @@ tcp_lookup_is_valid (tcp_connection_t * tc, tcp_header_t * hdr)
  * Lookup transport connection
  */
 static tcp_connection_t *
-tcp_lookup_connection (vlib_buffer_t * b, u8 thread_index, u8 is_ip4)
+tcp_lookup_connection (u32 fib_index, vlib_buffer_t * b, u8 thread_index,
+                      u8 is_ip4)
 {
   tcp_header_t *tcp;
   transport_connection_t *tconn;
@@ -1896,12 +1907,13 @@ tcp_lookup_connection (vlib_buffer_t * b, u8 thread_index, u8 is_ip4)
       ip4_header_t *ip4;
       ip4 = vlib_buffer_get_current (b);
       tcp = ip4_next_header (ip4);
-      tconn = stream_session_lookup_transport_wt4 (&ip4->dst_address,
-                                                  &ip4->src_address,
-                                                  tcp->dst_port,
-                                                  tcp->src_port,
-                                                  SESSION_TYPE_IP4_TCP,
-                                                  thread_index);
+      tconn = session_lookup_connection_wt4 (fib_index,
+                                            &ip4->dst_address,
+                                            &ip4->src_address,
+                                            tcp->dst_port,
+                                            tcp->src_port,
+                                            TRANSPORT_PROTO_TCP,
+                                            thread_index);
       tc = tcp_get_connection_from_transport (tconn);
       ASSERT (tcp_lookup_is_valid (tc, tcp));
     }
@@ -1910,12 +1922,13 @@ tcp_lookup_connection (vlib_buffer_t * b, u8 thread_index, u8 is_ip4)
       ip6_header_t *ip6;
       ip6 = vlib_buffer_get_current (b);
       tcp = ip6_next_header (ip6);
-      tconn = stream_session_lookup_transport_wt6 (&ip6->dst_address,
-                                                  &ip6->src_address,
-                                                  tcp->dst_port,
-                                                  tcp->src_port,
-                                                  SESSION_TYPE_IP6_TCP,
-                                                  thread_index);
+      tconn = session_lookup_connection_wt6 (fib_index,
+                                            &ip6->dst_address,
+                                            &ip6->src_address,
+                                            tcp->dst_port,
+                                            tcp->src_port,
+                                            TRANSPORT_PROTO_TCP,
+                                            thread_index);
       tc = tcp_get_connection_from_transport (tconn);
       ASSERT (tcp_lookup_is_valid (tc, tcp));
     }
@@ -1973,7 +1986,8 @@ tcp46_syn_sent_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
          if (PREDICT_FALSE (tc0->flags & TCP_CONN_HALF_OPEN_DONE))
            {
              /* Make sure the connection actually exists */
-             ASSERT (tcp_lookup_connection (b0, my_thread_index, is_ip4));
+             ASSERT (tcp_lookup_connection (tc0->c_fib_index, b0,
+                                            my_thread_index, is_ip4));
              goto drop;
            }
 
@@ -2107,7 +2121,7 @@ tcp46_syn_sent_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
 
              /* Notify app that we have connection. If session layer can't
               * allocate session send reset */
-             if (stream_session_connect_notify (&new_tc0->connection, 0))
+             if (session_stream_connect_notify (&new_tc0->connection, 0))
                {
                  clib_warning ("connect notify fail");
                  tcp_send_reset_w_pkt (new_tc0, b0, is_ip4);
@@ -2128,7 +2142,7 @@ tcp46_syn_sent_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
              new_tc0->state = TCP_STATE_SYN_RCVD;
 
              /* Notify app that we have connection */
-             if (stream_session_connect_notify (&new_tc0->connection, 0))
+             if (session_stream_connect_notify (&new_tc0->connection, 0))
                {
                  tcp_connection_cleanup (new_tc0);
                  tcp_send_reset_w_pkt (tc0, b0, is_ip4);
@@ -2161,7 +2175,8 @@ tcp46_syn_sent_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
        drop:
 
          b0->error = error0 ? node->errors[error0] : 0;
-         if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED))
+         if (PREDICT_FALSE
+             ((b0->flags & VLIB_BUFFER_IS_TRACED) && tcp0 != 0))
            {
              t0 = vlib_add_trace (vm, node, b0, sizeof (*t0));
              clib_memcpy (&t0->tcp_header, tcp0, sizeof (t0->tcp_header));
@@ -2176,17 +2191,11 @@ tcp46_syn_sent_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
     }
 
-  errors = session_manager_flush_enqueue_events (my_thread_index);
-  if (errors)
-    {
-      if (is_ip4)
-       vlib_node_increment_counter (vm, tcp4_established_node.index,
-                                    TCP_ERROR_EVENT_FIFO_FULL, errors);
-      else
-       vlib_node_increment_counter (vm, tcp6_established_node.index,
-                                    TCP_ERROR_EVENT_FIFO_FULL, errors);
-    }
-
+  errors = session_manager_flush_enqueue_events (TRANSPORT_PROTO_TCP,
+                                                my_thread_index);
+  tcp_node_inc_counter (vm, is_ip4, tcp4_syn_sent_node.index,
+                       tcp6_syn_sent_node.index,
+                       TCP_ERROR_EVENT_FIFO_FULL, errors);
   return from_frame->n_vectors;
 }
 
@@ -2248,6 +2257,9 @@ VLIB_REGISTER_NODE (tcp6_syn_sent_node) =
 
 VLIB_NODE_FUNCTION_MULTIARCH (tcp6_syn_sent_node, tcp6_syn_sent_rcv);
 
+vlib_node_registration_t tcp4_rcv_process_node;
+vlib_node_registration_t tcp6_rcv_process_node;
+
 /**
  * Handles reception for all states except LISTEN, SYN-SENT and ESTABLISHED
  * as per RFC793 p. 64
@@ -2306,7 +2318,9 @@ tcp46_rcv_process_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
          if (CLIB_DEBUG)
            {
              tcp_connection_t *tmp;
-             tmp = tcp_lookup_connection (b0, my_thread_index, is_ip4);
+             tmp =
+               tcp_lookup_connection (tc0->c_fib_index, b0, my_thread_index,
+                                      is_ip4);
              if (tmp->state != tc0->state)
                {
                  clib_warning ("state changed");
@@ -2349,7 +2363,7 @@ tcp46_rcv_process_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
               */
              if (!tcp_rcv_ack_is_acceptable (tc0, b0))
                {
-                 clib_warning ("connection not accepted");
+                 TCP_DBG ("connection not accepted");
                  tcp_send_reset_w_pkt (tc0, b0, is_ip4);
                  goto drop;
                }
@@ -2428,7 +2442,7 @@ tcp46_rcv_process_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
 
              tc0->state = TCP_STATE_TIME_WAIT;
              TCP_EVT_DBG (TCP_EVT_STATE_CHANGE, tc0);
-             tcp_timer_update (tc0, TCP_TIMER_WAITCLOSE, TCP_2MSL_TIME);
+             tcp_timer_update (tc0, TCP_TIMER_WAITCLOSE, TCP_TIMEWAIT_TIME);
              goto drop;
 
              break;
@@ -2438,11 +2452,14 @@ tcp46_rcv_process_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
               * delete the TCB, enter the CLOSED state, and return. */
 
              if (!tcp_rcv_ack_is_acceptable (tc0, b0))
-               goto drop;
+               {
+                 error0 = TCP_ERROR_ACK_INVALID;
+                 goto drop;
+               }
 
              tc0->snd_una = vnet_buffer (b0)->tcp.ack_number;
-             /* Apparently our FIN was lost */
-             if (is_fin0)
+             /* Apparently our ACK for the peer's FIN was lost */
+             if (is_fin0 && tc0->snd_una != tc0->snd_una_max)
                {
                  tcp_send_fin (tc0);
                  goto drop;
@@ -2450,13 +2467,13 @@ tcp46_rcv_process_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
 
              tc0->state = TCP_STATE_CLOSED;
              TCP_EVT_DBG (TCP_EVT_STATE_CHANGE, tc0);
+             tcp_connection_timers_reset (tc0);
 
              /* Don't delete the connection/session yet. Instead, wait a
               * reasonable amount of time until the pipes are cleared. In
               * particular, this makes sure that we won't have dead sessions
               * when processing events on the tx path */
-             tcp_timer_update (tc0, TCP_TIMER_WAITCLOSE, TCP_CLEANUP_TIME);
-             tcp_retransmit_timer_reset (tc0);
+             tcp_timer_set (tc0, TCP_TIMER_WAITCLOSE, TCP_CLEANUP_TIME);
 
              goto drop;
 
@@ -2470,7 +2487,8 @@ tcp46_rcv_process_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
                goto drop;
 
              tcp_make_ack (tc0, b0);
-             tcp_timer_update (tc0, TCP_TIMER_WAITCLOSE, TCP_2MSL_TIME);
+             next0 = tcp_next_output (is_ip4);
+             tcp_timer_update (tc0, TCP_TIMER_WAITCLOSE, TCP_TIMEWAIT_TIME);
 
              goto drop;
 
@@ -2566,16 +2584,11 @@ tcp46_rcv_process_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
       vlib_put_next_frame (vm, node, next_index, n_left_to_next);
     }
 
-  errors = session_manager_flush_enqueue_events (my_thread_index);
-  if (errors)
-    {
-      if (is_ip4)
-       vlib_node_increment_counter (vm, tcp4_established_node.index,
-                                    TCP_ERROR_EVENT_FIFO_FULL, errors);
-      else
-       vlib_node_increment_counter (vm, tcp6_established_node.index,
-                                    TCP_ERROR_EVENT_FIFO_FULL, errors);
-    }
+  errors = session_manager_flush_enqueue_events (TRANSPORT_PROTO_TCP,
+                                                my_thread_index);
+  tcp_node_inc_counter (vm, is_ip4, tcp4_rcv_process_node.index,
+                       tcp6_rcv_process_node.index,
+                       TCP_ERROR_EVENT_FIFO_FULL, errors);
 
   return from_frame->n_vectors;
 }
@@ -2650,7 +2663,6 @@ tcp46_listen_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
 {
   u32 n_left_from, next_index, *from, *to_next;
   u32 my_thread_index = vm->thread_index;
-  u8 sst = is_ip4 ? SESSION_TYPE_IP4_TCP : SESSION_TYPE_IP6_TCP;
 
   from = vlib_frame_vector_args (from_frame);
   n_left_from = from_frame->n_vectors;
@@ -2712,7 +2724,9 @@ tcp46_listen_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
          /* 3. check for a SYN (did that already) */
 
          /* Make sure connection wasn't just created */
-         child0 = tcp_lookup_connection (b0, my_thread_index, is_ip4);
+         child0 =
+           tcp_lookup_connection (lc0->c_fib_index, b0, my_thread_index,
+                                  is_ip4);
          if (PREDICT_FALSE (child0->state != TCP_STATE_LISTEN))
            {
              error0 = TCP_ERROR_CREATE_EXISTS;
@@ -2739,7 +2753,7 @@ tcp46_listen_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
                           sizeof (ip6_address_t));
            }
 
-         if (stream_session_accept (&child0->connection, lc0->c_s_index, sst,
+         if (stream_session_accept (&child0->connection, lc0->c_s_index,
                                     0 /* notify */ ))
            {
              clib_warning ("session accept fail");
@@ -2917,7 +2931,7 @@ tcp46_input_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
       while (n_left_from > 0 && n_left_to_next > 0)
        {
          int n_advance_bytes0, n_data_bytes0;
-         u32 bi0;
+         u32 bi0, fib_index0;
          vlib_buffer_t *b0;
          tcp_header_t *tcp0 = 0;
          tcp_connection_t *tc0;
@@ -2936,6 +2950,7 @@ tcp46_input_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
 
          b0 = vlib_get_buffer (vm, bi0);
          vnet_buffer (b0)->tcp.flags = 0;
+         fib_index0 = vnet_buffer (b0)->ip.fib_index;
 
          /* Checksum computed by ipx_local no need to compute again */
 
@@ -2947,12 +2962,13 @@ tcp46_input_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
                                  + tcp_header_bytes (tcp0));
              n_data_bytes0 = clib_net_to_host_u16 (ip40->length)
                - n_advance_bytes0;
-             tconn = stream_session_lookup_transport_wt4 (&ip40->dst_address,
-                                                          &ip40->src_address,
-                                                          tcp0->dst_port,
-                                                          tcp0->src_port,
-                                                          SESSION_TYPE_IP4_TCP,
-                                                          my_thread_index);
+             tconn = session_lookup_connection_wt4 (fib_index0,
+                                                    &ip40->dst_address,
+                                                    &ip40->src_address,
+                                                    tcp0->dst_port,
+                                                    tcp0->src_port,
+                                                    TRANSPORT_PROTO_TCP,
+                                                    my_thread_index);
              tc0 = tcp_get_connection_from_transport (tconn);
              ASSERT (tcp_lookup_is_valid (tc0, tcp0));
            }
@@ -2964,12 +2980,13 @@ tcp46_input_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
              n_data_bytes0 = clib_net_to_host_u16 (ip60->payload_length)
                - n_advance_bytes0;
              n_advance_bytes0 += sizeof (ip60[0]);
-             tconn = stream_session_lookup_transport_wt6 (&ip60->dst_address,
-                                                          &ip60->src_address,
-                                                          tcp0->dst_port,
-                                                          tcp0->src_port,
-                                                          SESSION_TYPE_IP6_TCP,
-                                                          my_thread_index);
+             tconn = session_lookup_connection_wt6 (fib_index0,
+                                                    &ip60->dst_address,
+                                                    &ip60->src_address,
+                                                    tcp0->dst_port,
+                                                    tcp0->src_port,
+                                                    TRANSPORT_PROTO_TCP,
+                                                    my_thread_index);
              tc0 = tcp_get_connection_from_transport (tconn);
              ASSERT (tcp_lookup_is_valid (tc0, tcp0));
            }