udp: cleanup input node
[vpp.git] / src / vnet / udp / udp_input.c
index 9a8ff07..0a4af60 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Copyright (c) 2016-2019 Cisco and/or its affiliates.
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
  * You may obtain a copy of the License at:
  * limitations under the License.
  */
 
+#include <vlibmemory/api.h>
 #include <vlib/vlib.h>
-#include <vnet/vnet.h>
-#include <vnet/pg/pg.h>
-#include <vnet/ip/ip.h>
 
-#include <vnet/udp/udp.h>
 #include <vppinfra/hash.h>
 #include <vppinfra/error.h>
 #include <vppinfra/elog.h>
 
+#include <vnet/vnet.h>
+#include <vnet/pg/pg.h>
+#include <vnet/ip/ip.h>
+#include <vnet/udp/udp.h>
 #include <vnet/udp/udp_packet.h>
+#include <vnet/session/session.h>
 
-#include <vlibmemory/api.h>
-#include "../session/application_interface.h"
-
-vlib_node_registration_t udp4_uri_input_node;
+static char *udp_error_strings[] = {
+#define udp_error(n,s) s,
+#include "udp_error.def"
+#undef udp_error
+};
 
 typedef struct
 {
-  u32 session;
+  u32 connection;
   u32 disposition;
   u32 thread_index;
-} udp4_uri_input_trace_t;
+} udp_input_trace_t;
 
 /* packet trace format function */
 static u8 *
-format_udp4_uri_input_trace (u8 * s, va_list * args)
+format_udp_input_trace (u8 * s, va_list * args)
 {
   CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
   CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
-  udp4_uri_input_trace_t *t = va_arg (*args, udp4_uri_input_trace_t *);
+  udp_input_trace_t *t = va_arg (*args, udp_input_trace_t *);
 
-  s = format (s, "UDP4_URI_INPUT: session %d, disposition %d, thread %d",
-             t->session, t->disposition, t->thread_index);
+  s = format (s, "UDP_INPUT: connection %d, disposition %d, thread %d",
+             t->connection, t->disposition, t->thread_index);
   return s;
 }
 
+#define foreach_udp_input_next                 \
+  _ (DROP, "error-drop")
+
 typedef enum
 {
-  UDP4_URI_INPUT_NEXT_DROP,
-  UDP4_URI_INPUT_N_NEXT,
-} udp4_uri_input_next_t;
-
-static char *udp4_uri_input_error_strings[] = {
-#define _(sym,string) string,
-  foreach_session_input_error
+#define _(s, n) UDP_INPUT_NEXT_##s,
+  foreach_udp_input_next
 #undef _
-};
+    UDP_INPUT_N_NEXT,
+} udp_input_next_t;
 
-static uword
-udp4_uri_input_node_fn (vlib_main_t * vm,
-                       vlib_node_runtime_t * node, vlib_frame_t * frame)
+always_inline void
+udp_input_inc_counter (vlib_main_t * vm, u8 is_ip4, u8 evt, u8 val)
 {
-  u32 n_left_from, *from, *to_next;
-  udp4_uri_input_next_t next_index;
-  udp_uri_main_t *um = vnet_get_udp_main ();
-  session_manager_main_t *smm = vnet_get_session_manager_main ();
-  u32 my_thread_index = vm->thread_index;
-  u8 my_enqueue_epoch;
-  u32 *session_indices_to_enqueue;
-  static u32 serial_number;
-  int i;
-
-  my_enqueue_epoch = ++smm->current_enqueue_epoch[my_thread_index];
-
-  from = vlib_frame_vector_args (frame);
-  n_left_from = frame->n_vectors;
-  next_index = node->cached_next_index;
+  if (is_ip4)
+    vlib_node_increment_counter (vm, udp4_input_node.index, evt, val);
+  else
+    vlib_node_increment_counter (vm, udp6_input_node.index, evt, val);
+}
 
-  while (n_left_from > 0)
-    {
-      u32 n_left_to_next;
+#define udp_store_err_counters(vm, is_ip4, cnts)                       \
+{                                                                      \
+  int i;                                                               \
+  for (i = 0; i < UDP_N_ERROR; i++)                                    \
+    if (cnts[i])                                                       \
+      udp_input_inc_counter(vm, is_ip4, i, cnts[i]);                   \
+}
 
-      vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
+#define udp_inc_err_counter(cnts, err, val)                            \
+{                                                                      \
+  cnts[err] += val;                                                    \
+}
 
-      while (n_left_from > 0 && n_left_to_next > 0)
-       {
-         u32 bi0;
-         vlib_buffer_t *b0;
-         u32 next0 = UDP4_URI_INPUT_NEXT_DROP;
-         u32 error0 = SESSION_ERROR_ENQUEUED;
-         udp_header_t *udp0;
-         ip4_header_t *ip0;
-         stream_session_t *s0;
-         svm_fifo_t *f0;
-         u16 udp_len0;
-         u8 *data0;
-
-         /* speculatively enqueue b0 to the current next frame */
-         bi0 = from[0];
-         to_next[0] = bi0;
-         from += 1;
-         to_next += 1;
-         n_left_from -= 1;
-         n_left_to_next -= 1;
-
-         b0 = vlib_get_buffer (vm, bi0);
-
-         /* udp_local hands us a pointer to the udp data */
-
-         data0 = vlib_buffer_get_current (b0);
-         udp0 = (udp_header_t *) (data0 - sizeof (*udp0));
-
-         /* $$$$ fixme: udp_local doesn't do ip options correctly anyhow */
-         ip0 = (ip4_header_t *) (((u8 *) udp0) - sizeof (*ip0));
-         s0 = 0;
-
-         /* lookup session */
-         s0 = stream_session_lookup4 (&ip0->dst_address, &ip0->src_address,
-                                      udp0->dst_port, udp0->src_port,
-                                      SESSION_TYPE_IP4_UDP);
-
-         /* no listener */
-         if (PREDICT_FALSE (s0 == 0))
-           {
-             error0 = SESSION_ERROR_NO_LISTENER;
-             goto trace0;
-           }
+static void
+udp_trace_buffer (vlib_main_t * vm, vlib_node_runtime_t * node,
+                 vlib_buffer_t * b, session_t * s, u16 error0)
+{
+  udp_input_trace_t *t;
 
-         f0 = s0->server_rx_fifo;
+  if (PREDICT_TRUE (!(b->flags & VLIB_BUFFER_IS_TRACED)))
+    return;
 
-         /* established hit */
-         if (PREDICT_TRUE (s0->session_state == SESSION_STATE_READY))
-           {
-             udp_len0 = clib_net_to_host_u16 (udp0->length);
+  t = vlib_add_trace (vm, node, b, sizeof (*t));
+  t->connection = s ? s->connection_index : ~0;
+  t->disposition = error0;
+  t->thread_index = s->thread_index;
+}
 
-             if (PREDICT_FALSE (udp_len0 > svm_fifo_max_enqueue (f0)))
-               {
-                 error0 = SESSION_ERROR_FIFO_FULL;
-                 goto trace0;
-               }
+static udp_connection_t *
+udp_connection_accept (udp_connection_t * listener, session_dgram_hdr_t * hdr,
+                      u32 thread_index)
+{
+  udp_connection_t *uc;
+
+  uc = udp_connection_alloc (thread_index);
+  ip_copy (&uc->c_lcl_ip, &hdr->lcl_ip, hdr->is_ip4);
+  ip_copy (&uc->c_rmt_ip, &hdr->rmt_ip, hdr->is_ip4);
+  uc->c_lcl_port = hdr->lcl_port;
+  uc->c_rmt_port = hdr->rmt_port;
+  uc->c_is_ip4 = hdr->is_ip4;
+  uc->c_fib_index = listener->c_fib_index;
+  uc->mss = listener->mss;
+  uc->flags |= UDP_CONN_F_CONNECTED;
+
+  if (session_dgram_accept (&uc->connection, listener->c_s_index,
+                           listener->c_thread_index))
+    {
+      udp_connection_free (uc);
+      return 0;
+    }
+  udp_connection_share_port (clib_net_to_host_u16
+                            (uc->c_lcl_port), uc->c_is_ip4);
+  return uc;
+}
 
-             svm_fifo_enqueue_nowait (f0, udp_len0 - sizeof (*udp0),
-                                      (u8 *) (udp0 + 1));
+static void
+udp_connection_enqueue (udp_connection_t * uc0, session_t * s0,
+                       session_dgram_hdr_t * hdr0, u32 thread_index,
+                       vlib_buffer_t * b, u8 queue_event, u32 * error0)
+{
+  int wrote0;
 
-             b0->error = node->errors[SESSION_ERROR_ENQUEUED];
+  clib_spinlock_lock (&uc0->rx_lock);
 
-             /* We need to send an RX event on this fifo */
-             if (s0->enqueue_epoch != my_enqueue_epoch)
-               {
-                 s0->enqueue_epoch = my_enqueue_epoch;
+  if (svm_fifo_max_enqueue_prod (s0->rx_fifo)
+      < hdr0->data_length + sizeof (session_dgram_hdr_t))
+    {
+      *error0 = UDP_ERROR_FIFO_FULL;
+      goto unlock_rx_lock;
+    }
 
-                 vec_add1 (smm->session_indices_to_enqueue_by_thread
-                           [my_thread_index],
-                           s0 - smm->sessions[my_thread_index]);
-               }
-           }
-         /* listener hit */
-         else if (s0->session_state == SESSION_STATE_LISTENING)
-           {
-             udp_connection_t *us;
-             int rv;
-
-             error0 = SESSION_ERROR_NOT_READY;
-
-             /*
-              * create udp transport session
-              */
-             pool_get (um->udp_sessions[my_thread_index], us);
-
-             us->mtu = 1024;   /* $$$$ policy */
-
-             us->c_lcl_ip4.as_u32 = ip0->dst_address.as_u32;
-             us->c_rmt_ip4.as_u32 = ip0->src_address.as_u32;
-             us->c_lcl_port = udp0->dst_port;
-             us->c_rmt_port = udp0->src_port;
-             us->c_proto = SESSION_TYPE_IP4_UDP;
-             us->c_c_index = us - um->udp_sessions[my_thread_index];
-
-             /*
-              * create stream session and attach the udp session to it
-              */
-             rv = stream_session_accept (&us->connection, s0->session_index,
-                                         SESSION_TYPE_IP4_UDP,
-                                         1 /*notify */ );
-             if (rv)
-               error0 = rv;
+  /* If session is owned by another thread and rx event needed,
+   * enqueue event now while we still have the peeker lock */
+  if (s0->thread_index != thread_index)
+    {
+      wrote0 = session_enqueue_dgram_connection (s0, hdr0, b,
+                                                TRANSPORT_PROTO_UDP,
+                                                /* queue event */ 0);
+      if (queue_event && !svm_fifo_has_event (s0->rx_fifo))
+       session_enqueue_notify (s0);
+    }
+  else
+    {
+      wrote0 = session_enqueue_dgram_connection (s0, hdr0, b,
+                                                TRANSPORT_PROTO_UDP,
+                                                queue_event);
+    }
+  ASSERT (wrote0 > 0);
 
-           }
-         else
-           {
+unlock_rx_lock:
 
-             error0 = SESSION_ERROR_NOT_READY;
-             goto trace0;
-           }
+  clib_spinlock_unlock (&uc0->rx_lock);
+}
 
-       trace0:
-         b0->error = node->errors[error0];
+always_inline session_t *
+udp_parse_and_lookup_buffer (vlib_buffer_t * b, session_dgram_hdr_t * hdr,
+                            u8 is_ip4)
+{
+  udp_header_t *udp;
+  u32 fib_index;
+  session_t *s;
 
-         if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE)
-                            && (b0->flags & VLIB_BUFFER_IS_TRACED)))
-           {
-             udp4_uri_input_trace_t *t =
-               vlib_add_trace (vm, node, b0, sizeof (*t));
-
-             t->session = ~0;
-             if (s0)
-               t->session = s0 - smm->sessions[my_thread_index];
-             t->disposition = error0;
-             t->thread_index = my_thread_index;
-           }
+  /* udp_local hands us a pointer to the udp data */
+  udp = (udp_header_t *) (vlib_buffer_get_current (b) - sizeof (*udp));
+  fib_index = vnet_buffer (b)->ip.fib_index;
 
-         /* verify speculative enqueue, maybe switch current next frame */
-         vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
-                                          to_next, n_left_to_next,
-                                          bi0, next0);
-       }
+  hdr->data_offset = 0;
+  hdr->lcl_port = udp->dst_port;
+  hdr->rmt_port = udp->src_port;
+  hdr->is_ip4 = is_ip4;
 
-      vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+  if (is_ip4)
+    {
+      ip4_header_t *ip4;
+
+      /* TODO: must fix once udp_local does ip options correctly */
+      ip4 = (ip4_header_t *) (((u8 *) udp) - sizeof (*ip4));
+      ip_set (&hdr->lcl_ip, &ip4->dst_address, 1);
+      ip_set (&hdr->rmt_ip, &ip4->src_address, 1);
+      hdr->data_length = clib_net_to_host_u16 (ip4->length);
+      hdr->data_length -= sizeof (ip4_header_t) + sizeof (udp_header_t);
+      s = session_lookup_safe4 (fib_index, &ip4->dst_address,
+                               &ip4->src_address, udp->dst_port,
+                               udp->src_port, TRANSPORT_PROTO_UDP);
+    }
+  else
+    {
+      ip6_header_t *ip60;
+
+      ip60 = (ip6_header_t *) (((u8 *) udp) - sizeof (*ip60));
+      ip_set (&hdr->lcl_ip, &ip60->dst_address, 0);
+      ip_set (&hdr->rmt_ip, &ip60->src_address, 0);
+      hdr->data_length = clib_net_to_host_u16 (ip60->payload_length);
+      hdr->data_length -= sizeof (udp_header_t);
+      s = session_lookup_safe6 (fib_index, &ip60->dst_address,
+                               &ip60->src_address, udp->dst_port,
+                               udp->src_port, TRANSPORT_PROTO_UDP);
     }
 
-  /* Send enqueue events */
+  if (PREDICT_TRUE (!(b->flags & VLIB_BUFFER_NEXT_PRESENT)))
+    b->current_length = hdr->data_length;
+  else
+    b->total_length_not_including_first_buffer = hdr->data_length
+      - b->current_length;
 
-  session_indices_to_enqueue =
-    smm->session_indices_to_enqueue_by_thread[my_thread_index];
+  return s;
+}
 
-  for (i = 0; i < vec_len (session_indices_to_enqueue); i++)
-    {
-      session_fifo_event_t evt;
-      unix_shared_memory_queue_t *q;
-      stream_session_t *s0;
-      application_t *server0;
+always_inline uword
+udp46_input_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
+                   vlib_frame_t * frame, u8 is_ip4)
+{
+  u32 n_left_from, *from, errors, *first_buffer;
+  vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b;
+  u16 err_counters[UDP_N_ERROR] = { 0 };
+  u32 thread_index = vm->thread_index;
 
-      /* Get session */
-      s0 = pool_elt_at_index (smm->sessions[my_thread_index],
-                             session_indices_to_enqueue[i]);
+  from = first_buffer = vlib_frame_vector_args (frame);
+  n_left_from = frame->n_vectors;
+  vlib_get_buffers (vm, from, bufs, n_left_from);
 
-      /* Get session's server */
-      server0 = application_get (s0->app_index);
+  b = bufs;
 
-      /* Built-in server? Deliver the goods... */
-      if (server0->cb_fns.builtin_server_rx_callback)
-       {
-         server0->cb_fns.builtin_server_rx_callback (s0);
-         continue;
-       }
+  while (n_left_from > 0)
+    {
+      u32 error0 = UDP_ERROR_ENQUEUED;
+      session_dgram_hdr_t hdr0;
+      udp_connection_t *uc0;
+      session_t *s0;
 
-      if (svm_fifo_set_event (s0->server_rx_fifo))
+      s0 = udp_parse_and_lookup_buffer (b[0], &hdr0, is_ip4);
+      if (PREDICT_FALSE (!s0))
        {
-         /* Fabricate event */
-         evt.fifo = s0->server_rx_fifo;
-         evt.event_type = FIFO_EVENT_APP_RX;
-         evt.event_id = serial_number++;
+         error0 = UDP_ERROR_NO_LISTENER;
+         goto done;
+       }
 
-         /* Add event to server's event queue */
-         q = server0->event_queue;
+      /*
+       * If session exists pool peeker lock is taken at this point unless
+       * the session is already on the right thread or is a listener
+       */
 
-         /* Don't block for lack of space */
-         if (PREDICT_TRUE (q->cursize < q->maxsize))
+      if (s0->session_state == SESSION_STATE_OPENED)
+       {
+         u8 queue_event = 1;
+         uc0 = udp_connection_from_transport (session_get_transport (s0));
+         if (uc0->flags & UDP_CONN_F_CONNECTED)
            {
-             unix_shared_memory_queue_add (server0->event_queue,
-                                           (u8 *) & evt,
-                                           0 /* do wait for mutex */ );
+             if (s0->thread_index != thread_index)
+               {
+                 /*
+                  * Clone the transport. It will be cleaned up with the
+                  * session once we notify the session layer.
+                  */
+                 uc0 = udp_connection_clone_safe (s0->connection_index,
+                                                  s0->thread_index);
+                 ASSERT (s0->session_index == uc0->c_s_index);
+
+                 /*
+                  * Drop the peeker lock on pool resize and ask session
+                  * layer for a new session.
+                  */
+                 session_pool_remove_peeker (s0->thread_index);
+                 session_dgram_connect_notify (&uc0->connection,
+                                               s0->thread_index, &s0);
+                 queue_event = 0;
+               }
+             else
+               s0->session_state = SESSION_STATE_READY;
            }
-         else
+         udp_connection_enqueue (uc0, s0, &hdr0, thread_index, b[0],
+                                 queue_event, &error0);
+         session_pool_remove_peeker (s0->thread_index);
+       }
+      else if (s0->session_state == SESSION_STATE_READY)
+       {
+         uc0 = udp_connection_from_transport (session_get_transport (s0));
+         udp_connection_enqueue (uc0, s0, &hdr0, thread_index, b[0], 1,
+                                 &error0);
+       }
+      else if (s0->session_state == SESSION_STATE_LISTENING)
+       {
+         uc0 = udp_connection_from_transport (session_get_transport (s0));
+         if (uc0->flags & UDP_CONN_F_CONNECTED)
            {
-             vlib_node_increment_counter (vm, udp4_uri_input_node.index,
-                                          SESSION_ERROR_FIFO_FULL, 1);
+             uc0 = udp_connection_accept (uc0, &hdr0, thread_index);
+             if (!uc0)
+               {
+                 error0 = UDP_ERROR_CREATE_SESSION;
+                 goto done;
+               }
+             s0 = session_get (uc0->c_s_index, uc0->c_thread_index);
+             error0 = UDP_ERROR_ACCEPT;
            }
+         udp_connection_enqueue (uc0, s0, &hdr0, thread_index, b[0], 1,
+                                 &error0);
        }
-      /* *INDENT-OFF* */
-      if (1)
+      else
        {
-         ELOG_TYPE_DECLARE (e) =
-         {
-             .format = "evt-enqueue: id %d length %d",
-             .format_args = "i4i4",};
-         struct
-         {
-           u32 data[2];
-         } *ed;
-         ed = ELOG_DATA (&vlib_global_main.elog_main, e);
-         ed->data[0] = evt.event_id;
-         ed->data[1] = svm_fifo_max_dequeue (s0->server_rx_fifo);
+         error0 = UDP_ERROR_NOT_READY;
+         session_pool_remove_peeker (s0->thread_index);
        }
-      /* *INDENT-ON* */
 
-    }
+    done:
+
+      b += 1;
+      n_left_from -= 1;
 
-  vec_reset_length (session_indices_to_enqueue);
+      udp_inc_err_counter (err_counters, error0, 1);
 
-  smm->session_indices_to_enqueue_by_thread[my_thread_index] =
-    session_indices_to_enqueue;
+      if (PREDICT_FALSE (node->flags & VLIB_NODE_FLAG_TRACE))
+       udp_trace_buffer (vm, node, b[0], s0, error0);
+    }
 
+  vlib_buffer_free (vm, first_buffer, frame->n_vectors);
+  errors = session_main_flush_enqueue_events (TRANSPORT_PROTO_UDP,
+                                             thread_index);
+  err_counters[UDP_ERROR_MQ_FULL] = errors;
+  udp_store_err_counters (vm, is_ip4, err_counters);
   return frame->n_vectors;
 }
 
-VLIB_REGISTER_NODE (udp4_uri_input_node) =
+static uword
+udp4_input (vlib_main_t * vm, vlib_node_runtime_t * node,
+           vlib_frame_t * frame)
 {
-  .function = udp4_uri_input_node_fn,.name = "udp4-uri-input",.vector_size =
-    sizeof (u32),.format_trace = format_udp4_uri_input_trace,.type =
-    VLIB_NODE_TYPE_INTERNAL,.n_errors =
-    ARRAY_LEN (udp4_uri_input_error_strings),.error_strings =
-    udp4_uri_input_error_strings,.n_next_nodes = UDP4_URI_INPUT_N_NEXT,
-    /* edit / add dispositions here */
-    .next_nodes =
-  {
-  [UDP4_URI_INPUT_NEXT_DROP] = "error-drop",}
-,};
+  return udp46_input_inline (vm, node, frame, 1);
+}
+
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE (udp4_input_node) =
+{
+  .function = udp4_input,
+  .name = "udp4-input",
+  .vector_size = sizeof (u32),
+  .format_trace = format_udp_input_trace,
+  .type = VLIB_NODE_TYPE_INTERNAL,
+  .n_errors = ARRAY_LEN (udp_error_strings),
+  .error_strings = udp_error_strings,
+  .n_next_nodes = UDP_INPUT_N_NEXT,
+  .next_nodes = {
+#define _(s, n) [UDP_INPUT_NEXT_##s] = n,
+      foreach_udp_input_next
+#undef _
+  },
+};
+/* *INDENT-ON* */
+
+static uword
+udp6_input (vlib_main_t * vm, vlib_node_runtime_t * node,
+           vlib_frame_t * frame)
+{
+  return udp46_input_inline (vm, node, frame, 0);
+}
+
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE (udp6_input_node) =
+{
+  .function = udp6_input,
+  .name = "udp6-input",
+  .vector_size = sizeof (u32),
+  .format_trace = format_udp_input_trace,
+  .type = VLIB_NODE_TYPE_INTERNAL,
+  .n_errors = ARRAY_LEN (udp_error_strings),
+  .error_strings = udp_error_strings,
+  .n_next_nodes = UDP_INPUT_N_NEXT,
+  .next_nodes = {
+#define _(s, n) [UDP_INPUT_NEXT_##s] = n,
+      foreach_udp_input_next
+#undef _
+  },
+};
+/* *INDENT-ON* */
 
 /*
  * fd.io coding-style-patch-verification: ON