udp: track connection port sharing
[vpp.git] / src / vnet / udp / udp_input.c
index 8170cfb..4b22cbe 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Copyright (c) 2016-2019 Cisco and/or its affiliates.
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
  * You may obtain a copy of the License at:
  * limitations under the License.
  */
 
+#include <vlibmemory/api.h>
 #include <vlib/vlib.h>
-#include <vnet/vnet.h>
-#include <vnet/pg/pg.h>
-#include <vnet/ip/ip.h>
 
-#include <vnet/udp/udp.h>
 #include <vppinfra/hash.h>
 #include <vppinfra/error.h>
 #include <vppinfra/elog.h>
 
+#include <vnet/vnet.h>
+#include <vnet/pg/pg.h>
+#include <vnet/ip/ip.h>
+#include <vnet/udp/udp.h>
 #include <vnet/udp/udp_packet.h>
-
-#include <vlibmemory/api.h>
-#include "../session/application_interface.h"
+#include <vnet/session/session.h>
 
 static char *udp_error_strings[] = {
 #define udp_error(n,s) s,
@@ -81,98 +80,119 @@ always_inline uword
 udp46_input_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
                    vlib_frame_t * frame, u8 is_ip4)
 {
-  u32 n_left_from, *from, *to_next;
-  u32 next_index, errors;
+  u32 n_left_from, *from;
+  u32 errors, *first_buffer;
   u32 my_thread_index = vm->thread_index;
 
-  from = vlib_frame_vector_args (frame);
+  from = first_buffer = vlib_frame_vector_args (frame);
   n_left_from = frame->n_vectors;
-  next_index = node->cached_next_index;
 
   while (n_left_from > 0)
     {
-      u32 n_left_to_next;
-
-      vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
-
-      while (n_left_from > 0 && n_left_to_next > 0)
+      u32 bi0, fib_index0, data_len;
+      vlib_buffer_t *b0;
+      u32 error0 = UDP_ERROR_ENQUEUED;
+      udp_header_t *udp0;
+      ip4_header_t *ip40;
+      ip6_header_t *ip60;
+      u8 *data0;
+      session_t *s0;
+      udp_connection_t *uc0, *child0, *new_uc0;
+      transport_connection_t *tc0;
+      int wrote0;
+      void *rmt_addr, *lcl_addr;
+      session_dgram_hdr_t hdr0;
+      u8 queue_event = 1;
+
+      /* speculatively enqueue b0 to the current next frame */
+      bi0 = from[0];
+      from += 1;
+      n_left_from -= 1;
+
+      b0 = vlib_get_buffer (vm, bi0);
+
+      /* udp_local hands us a pointer to the udp data */
+      data0 = vlib_buffer_get_current (b0);
+      udp0 = (udp_header_t *) (data0 - sizeof (*udp0));
+      fib_index0 = vnet_buffer (b0)->ip.fib_index;
+
+      if (is_ip4)
        {
-         u32 bi0, fib_index0;
-         vlib_buffer_t *b0;
-         u32 next0 = UDP_INPUT_NEXT_DROP;
-         u32 error0 = UDP_ERROR_ENQUEUED;
-         udp_header_t *udp0;
-         ip4_header_t *ip40;
-         ip6_header_t *ip60;
-         u8 *data0;
-         stream_session_t *s0;
-         transport_connection_t *tc0 = 0;
-         udp_connection_t *child0, *new_uc0;
-         int written0;
-
-         /* speculatively enqueue b0 to the current next frame */
-         bi0 = from[0];
-         to_next[0] = bi0;
-         from += 1;
-         to_next += 1;
-         n_left_from -= 1;
-         n_left_to_next -= 1;
-
-         b0 = vlib_get_buffer (vm, bi0);
-
-         /* udp_local hands us a pointer to the udp data */
-         data0 = vlib_buffer_get_current (b0);
-         udp0 = (udp_header_t *) (data0 - sizeof (*udp0));
-         fib_index0 = vnet_buffer (b0)->ip.fib_index;
-
-         if (is_ip4)
-           {
-             /* $$$$ fixme: udp_local doesn't do ip options correctly anyhow */
-             ip40 = (ip4_header_t *) (((u8 *) udp0) - sizeof (*ip40));
-             s0 = session_lookup_safe4 (fib_index0, &ip40->dst_address,
-                                        &ip40->src_address, udp0->dst_port,
-                                        udp0->src_port, TRANSPORT_PROTO_UDP);
-           }
-         else
-           {
-             ip60 = (ip6_header_t *) (((u8 *) udp0) - sizeof (*ip60));
-             s0 = session_lookup_safe6 (fib_index0, &ip60->dst_address,
-                                        &ip60->src_address, udp0->dst_port,
-                                        udp0->src_port, TRANSPORT_PROTO_UDP);
-           }
+         /* TODO: must fix once udp_local does ip options correctly */
+         ip40 = (ip4_header_t *) (((u8 *) udp0) - sizeof (*ip40));
+         s0 = session_lookup_safe4 (fib_index0, &ip40->dst_address,
+                                    &ip40->src_address, udp0->dst_port,
+                                    udp0->src_port, TRANSPORT_PROTO_UDP);
+         lcl_addr = &ip40->dst_address;
+         rmt_addr = &ip40->src_address;
+         data_len = clib_net_to_host_u16 (ip40->length);
+         data_len -= sizeof (ip4_header_t) + sizeof (udp_header_t);
+       }
+      else
+       {
+         ip60 = (ip6_header_t *) (((u8 *) udp0) - sizeof (*ip60));
+         s0 = session_lookup_safe6 (fib_index0, &ip60->dst_address,
+                                    &ip60->src_address, udp0->dst_port,
+                                    udp0->src_port, TRANSPORT_PROTO_UDP);
+         lcl_addr = &ip60->dst_address;
+         rmt_addr = &ip60->src_address;
+         data_len = clib_net_to_host_u16 (ip60->payload_length);
+         data_len -= sizeof (udp_header_t);
+       }
 
-         if (PREDICT_FALSE (s0 == 0))
-           {
-             error0 = UDP_ERROR_NO_LISTENER;
-             goto trace0;
-           }
+      if (PREDICT_FALSE (!s0))
+       {
+         error0 = UDP_ERROR_NO_LISTENER;
+         goto trace0;
+       }
 
-         if (PREDICT_TRUE (s0->session_state == SESSION_STATE_READY))
-           {
-             tc0 = session_get_transport (s0);
-           }
-         else if (s0->session_state == SESSION_STATE_CONNECTING_READY)
+      if (s0->session_state == SESSION_STATE_OPENED)
+       {
+         /* TODO optimization: move cl session to right thread
+          * However, since such a move would affect the session handle,
+          * which we pass 'raw' to the app, we'd also have notify the
+          * app of the change or change the way we pass handles to apps.
+          */
+         tc0 = session_get_transport (s0);
+         uc0 = udp_get_connection_from_transport (tc0);
+         if (uc0->flags & UDP_CONN_F_CONNECTED)
            {
-             /*
-              * Clone the transport. It will be cleaned up with the
-              * session once we notify the session layer.
-              */
-             new_uc0 = udp_conenction_clone_safe (s0->connection_index,
-                                                  s0->thread_index);
-             ASSERT (s0->session_index == new_uc0->c_s_index);
-
-             /*
-              * Drop the 'lock' on pool resize
-              */
-             session_pool_remove_peeker (s0->thread_index);
-             session_dgram_connect_notify (&new_uc0->connection,
-                                           s0->thread_index, &s0);
-             tc0 = &new_uc0->connection;
+             if (s0->thread_index != vlib_get_thread_index ())
+               {
+                 /*
+                  * Clone the transport. It will be cleaned up with the
+                  * session once we notify the session layer.
+                  */
+                 new_uc0 =
+                   udp_connection_clone_safe (s0->connection_index,
+                                              s0->thread_index);
+                 ASSERT (s0->session_index == new_uc0->c_s_index);
+
+                 /*
+                  * Drop the 'lock' on pool resize
+                  */
+                 session_pool_remove_peeker (s0->thread_index);
+                 session_dgram_connect_notify (&new_uc0->connection,
+                                               s0->thread_index, &s0);
+                 tc0 = &new_uc0->connection;
+                 uc0 = new_uc0;
+                 queue_event = 0;
+               }
+             else
+               s0->session_state = SESSION_STATE_READY;
            }
-         else if (s0->session_state == SESSION_STATE_LISTENING)
+       }
+      else if (s0->session_state == SESSION_STATE_READY)
+       {
+         tc0 = session_get_transport (s0);
+         uc0 = udp_get_connection_from_transport (tc0);
+       }
+      else if (s0->session_state == SESSION_STATE_LISTENING)
+       {
+         tc0 = listen_session_get_transport (s0);
+         uc0 = udp_get_connection_from_transport (tc0);
+         if (uc0->flags & UDP_CONN_F_CONNECTED)
            {
-             tc0 = listen_session_get_transport (s0);
-
              child0 = udp_connection_alloc (my_thread_index);
              if (is_ip4)
                {
@@ -187,10 +207,11 @@ udp46_input_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
              child0->c_lcl_port = udp0->dst_port;
              child0->c_rmt_port = udp0->src_port;
              child0->c_is_ip4 = is_ip4;
-             child0->mtu = 1460;       /* $$$$ policy */
+             child0->c_fib_index = tc0->fib_index;
+             child0->flags |= UDP_CONN_F_CONNECTED;
 
-             if (stream_session_accept
-                 (&child0->connection, tc0->s_index, 1))
+             if (session_stream_accept (&child0->connection,
+                                        tc0->s_index, tc0->thread_index, 1))
                {
                  error0 = UDP_ERROR_CREATE_SESSION;
                  goto trace0;
@@ -198,53 +219,79 @@ udp46_input_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
              s0 = session_get (child0->c_s_index, child0->c_thread_index);
              s0->session_state = SESSION_STATE_READY;
              tc0 = &child0->connection;
-
+             uc0 = udp_get_connection_from_transport (tc0);
+             udp_connection_share_port (clib_net_to_host_u16
+                                        (uc0->c_lcl_port), uc0->c_is_ip4);
              error0 = UDP_ERROR_LISTENER;
            }
-         else
-           {
-             error0 = UDP_ERROR_NOT_READY;
-             goto trace0;
-           }
+       }
+      else
+       {
+         error0 = UDP_ERROR_NOT_READY;
+         goto trace0;
+       }
 
-         written0 = session_enqueue_dgram_connection (s0, b0, tc0->proto,
-                                                      1 /* queue evt */ );
-         if (PREDICT_FALSE (written0 < 0))
-           {
-             error0 = UDP_ERROR_FIFO_FULL;
-             goto trace0;
-           }
 
-       trace0:
-         b0->error = node->errors[error0];
+      if (svm_fifo_max_enqueue_prod (s0->rx_fifo)
+         < data_len + sizeof (session_dgram_hdr_t))
+       {
+         error0 = UDP_ERROR_FIFO_FULL;
+         goto trace0;
+       }
+      hdr0.data_length = b0->current_length = data_len;
+      hdr0.data_offset = 0;
+      ip_set (&hdr0.lcl_ip, lcl_addr, is_ip4);
+      ip_set (&hdr0.rmt_ip, rmt_addr, is_ip4);
+      hdr0.lcl_port = udp0->dst_port;
+      hdr0.rmt_port = udp0->src_port;
+      hdr0.is_ip4 = is_ip4;
+
+      clib_spinlock_lock (&uc0->rx_lock);
+      /* If session is owned by another thread and rx event needed,
+       * enqueue event now while we still have the peeker lock */
+      if (s0->thread_index != my_thread_index)
+       {
+         wrote0 = session_enqueue_dgram_connection (s0, &hdr0, b0,
+                                                    TRANSPORT_PROTO_UDP,
+                                                    /* queue event */ 0);
+         if (queue_event && !svm_fifo_has_event (s0->rx_fifo))
+           session_enqueue_notify (s0);
+       }
+      else
+       {
+         wrote0 = session_enqueue_dgram_connection (s0, &hdr0, b0,
+                                                    TRANSPORT_PROTO_UDP,
+                                                    queue_event);
+       }
+      clib_spinlock_unlock (&uc0->rx_lock);
+      ASSERT (wrote0 > 0);
 
-         if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE)
-                            && (b0->flags & VLIB_BUFFER_IS_TRACED)))
-           {
-             udp_input_trace_t *t = vlib_add_trace (vm, node, b0,
-                                                    sizeof (*t));
+      if (s0->session_state != SESSION_STATE_LISTENING)
+       session_pool_remove_peeker (s0->thread_index);
 
-             t->connection = tc0 ? tc0->c_index : ~0;
-             t->disposition = error0;
-             t->thread_index = my_thread_index;
-           }
+    trace0:
 
-         vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
-                                          to_next, n_left_to_next,
-                                          bi0, next0);
-       }
+      b0->error = node->errors[error0];
 
-      vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+      if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE)
+                        && (b0->flags & VLIB_BUFFER_IS_TRACED)))
+       {
+         udp_input_trace_t *t = vlib_add_trace (vm, node, b0,
+                                                sizeof (*t));
+
+         t->connection = s0 ? s0->connection_index : ~0;
+         t->disposition = error0;
+         t->thread_index = my_thread_index;
+       }
     }
 
-  errors = session_manager_flush_enqueue_events (TRANSPORT_PROTO_UDP,
-                                                my_thread_index);
+  vlib_buffer_free (vm, first_buffer, frame->n_vectors);
+  errors = session_main_flush_enqueue_events (TRANSPORT_PROTO_UDP,
+                                             my_thread_index);
   udp_input_inc_counter (vm, is_ip4, UDP_ERROR_EVENT_FIFO_FULL, errors);
   return frame->n_vectors;
 }
 
-vlib_node_registration_t udp4_input_node;
-vlib_node_registration_t udp6_input_node;
 
 static uword
 udp4_input (vlib_main_t * vm, vlib_node_runtime_t * node,