/*
- * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Copyright (c) 2016-2019 Cisco and/or its affiliates.
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at:
* limitations under the License.
*/
+#include <vlibmemory/api.h>
#include <vlib/vlib.h>
-#include <vnet/vnet.h>
-#include <vnet/pg/pg.h>
-#include <vnet/ip/ip.h>
-#include <vnet/udp/udp.h>
#include <vppinfra/hash.h>
#include <vppinfra/error.h>
#include <vppinfra/elog.h>
+#include <vnet/vnet.h>
+#include <vnet/pg/pg.h>
+#include <vnet/ip/ip.h>
+#include <vnet/udp/udp.h>
#include <vnet/udp/udp_packet.h>
+#include <vnet/session/session.h>
-#include <vlibmemory/api.h>
-#include "../session/application_interface.h"
-
-vlib_node_registration_t udp4_uri_input_node;
+static char *udp_error_strings[] = {
+#define udp_error(n,s) s,
+#include "udp_error.def"
+#undef udp_error
+};
typedef struct
{
- u32 session;
+ u32 connection;
u32 disposition;
u32 thread_index;
-} udp4_uri_input_trace_t;
+} udp_input_trace_t;
/* packet trace format function */
static u8 *
-format_udp4_uri_input_trace (u8 * s, va_list * args)
+format_udp_input_trace (u8 * s, va_list * args)
{
CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
- udp4_uri_input_trace_t *t = va_arg (*args, udp4_uri_input_trace_t *);
+ udp_input_trace_t *t = va_arg (*args, udp_input_trace_t *);
- s = format (s, "UDP4_URI_INPUT: session %d, disposition %d, thread %d",
- t->session, t->disposition, t->thread_index);
+ s = format (s, "UDP_INPUT: connection %d, disposition %d, thread %d",
+ t->connection, t->disposition, t->thread_index);
return s;
}
+#define foreach_udp_input_next \
+ _ (DROP, "error-drop")
+
typedef enum
{
- UDP4_URI_INPUT_NEXT_DROP,
- UDP4_URI_INPUT_N_NEXT,
-} udp4_uri_input_next_t;
-
-static char *udp4_uri_input_error_strings[] = {
-#define _(sym,string) string,
- foreach_session_input_error
+#define _(s, n) UDP_INPUT_NEXT_##s,
+ foreach_udp_input_next
#undef _
-};
+ UDP_INPUT_N_NEXT,
+} udp_input_next_t;
-static uword
-udp4_uri_input_node_fn (vlib_main_t * vm,
- vlib_node_runtime_t * node, vlib_frame_t * frame)
+always_inline void
+udp_input_inc_counter (vlib_main_t * vm, u8 is_ip4, u8 evt, u8 val)
{
- u32 n_left_from, *from, *to_next;
- udp4_uri_input_next_t next_index;
- udp_uri_main_t *um = vnet_get_udp_main ();
- session_manager_main_t *smm = vnet_get_session_manager_main ();
- u32 my_thread_index = vm->thread_index;
- u8 my_enqueue_epoch;
- u32 *session_indices_to_enqueue;
- static u32 serial_number;
- int i;
-
- my_enqueue_epoch = ++smm->current_enqueue_epoch[my_thread_index];
-
- from = vlib_frame_vector_args (frame);
- n_left_from = frame->n_vectors;
- next_index = node->cached_next_index;
+ if (is_ip4)
+ vlib_node_increment_counter (vm, udp4_input_node.index, evt, val);
+ else
+ vlib_node_increment_counter (vm, udp6_input_node.index, evt, val);
+}
- while (n_left_from > 0)
- {
- u32 n_left_to_next;
+#define udp_store_err_counters(vm, is_ip4, cnts) \
+{ \
+ int i; \
+ for (i = 0; i < UDP_N_ERROR; i++) \
+ if (cnts[i]) \
+ udp_input_inc_counter(vm, is_ip4, i, cnts[i]); \
+}
- vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
+#define udp_inc_err_counter(cnts, err, val) \
+{ \
+ cnts[err] += val; \
+}
- while (n_left_from > 0 && n_left_to_next > 0)
- {
- u32 bi0;
- vlib_buffer_t *b0;
- u32 next0 = UDP4_URI_INPUT_NEXT_DROP;
- u32 error0 = SESSION_ERROR_ENQUEUED;
- udp_header_t *udp0;
- ip4_header_t *ip0;
- stream_session_t *s0;
- svm_fifo_t *f0;
- u16 udp_len0;
- u8 *data0;
-
- /* speculatively enqueue b0 to the current next frame */
- bi0 = from[0];
- to_next[0] = bi0;
- from += 1;
- to_next += 1;
- n_left_from -= 1;
- n_left_to_next -= 1;
-
- b0 = vlib_get_buffer (vm, bi0);
-
- /* udp_local hands us a pointer to the udp data */
-
- data0 = vlib_buffer_get_current (b0);
- udp0 = (udp_header_t *) (data0 - sizeof (*udp0));
-
- /* $$$$ fixme: udp_local doesn't do ip options correctly anyhow */
- ip0 = (ip4_header_t *) (((u8 *) udp0) - sizeof (*ip0));
- s0 = 0;
-
- /* lookup session */
- s0 = stream_session_lookup4 (&ip0->dst_address, &ip0->src_address,
- udp0->dst_port, udp0->src_port,
- SESSION_TYPE_IP4_UDP, my_thread_index);
-
- /* no listener */
- if (PREDICT_FALSE (s0 == 0))
- {
- error0 = SESSION_ERROR_NO_LISTENER;
- goto trace0;
- }
+static void
+udp_trace_buffer (vlib_main_t * vm, vlib_node_runtime_t * node,
+ vlib_buffer_t * b, session_t * s, u16 error0)
+{
+ udp_input_trace_t *t;
- f0 = s0->server_rx_fifo;
+ if (PREDICT_TRUE (!(b->flags & VLIB_BUFFER_IS_TRACED)))
+ return;
- /* established hit */
- if (PREDICT_TRUE (s0->session_state == SESSION_STATE_READY))
- {
- udp_len0 = clib_net_to_host_u16 (udp0->length);
+ t = vlib_add_trace (vm, node, b, sizeof (*t));
+ t->connection = s ? s->connection_index : ~0;
+ t->disposition = error0;
+ t->thread_index = s->thread_index;
+}
- if (PREDICT_FALSE (udp_len0 > svm_fifo_max_enqueue (f0)))
- {
- error0 = SESSION_ERROR_FIFO_FULL;
- goto trace0;
- }
+static udp_connection_t *
+udp_connection_accept (udp_connection_t * listener, session_dgram_hdr_t * hdr,
+ u32 thread_index)
+{
+ udp_connection_t *uc;
+
+ uc = udp_connection_alloc (thread_index);
+ ip_copy (&uc->c_lcl_ip, &hdr->lcl_ip, hdr->is_ip4);
+ ip_copy (&uc->c_rmt_ip, &hdr->rmt_ip, hdr->is_ip4);
+ uc->c_lcl_port = hdr->lcl_port;
+ uc->c_rmt_port = hdr->rmt_port;
+ uc->c_is_ip4 = hdr->is_ip4;
+ uc->c_fib_index = listener->c_fib_index;
+ uc->mss = listener->mss;
+ uc->flags |= UDP_CONN_F_CONNECTED;
+
+ if (session_dgram_accept (&uc->connection, listener->c_s_index,
+ listener->c_thread_index))
+ {
+ udp_connection_free (uc);
+ return 0;
+ }
+ udp_connection_share_port (clib_net_to_host_u16
+ (uc->c_lcl_port), uc->c_is_ip4);
+ return uc;
+}
- svm_fifo_enqueue_nowait (f0, udp_len0 - sizeof (*udp0),
- (u8 *) (udp0 + 1));
+static void
+udp_connection_enqueue (udp_connection_t * uc0, session_t * s0,
+ session_dgram_hdr_t * hdr0, u32 thread_index,
+ vlib_buffer_t * b, u8 queue_event, u32 * error0)
+{
+ int wrote0;
- b0->error = node->errors[SESSION_ERROR_ENQUEUED];
+ clib_spinlock_lock (&uc0->rx_lock);
- /* We need to send an RX event on this fifo */
- if (s0->enqueue_epoch != my_enqueue_epoch)
- {
- s0->enqueue_epoch = my_enqueue_epoch;
+ if (svm_fifo_max_enqueue_prod (s0->rx_fifo)
+ < hdr0->data_length + sizeof (session_dgram_hdr_t))
+ {
+ *error0 = UDP_ERROR_FIFO_FULL;
+ goto unlock_rx_lock;
+ }
- vec_add1 (smm->session_indices_to_enqueue_by_thread
- [my_thread_index],
- s0 - smm->sessions[my_thread_index]);
- }
- }
- /* listener hit */
- else if (s0->session_state == SESSION_STATE_LISTENING)
- {
- udp_connection_t *us;
- int rv;
-
- error0 = SESSION_ERROR_NOT_READY;
-
- /*
- * create udp transport session
- */
- pool_get (um->udp_sessions[my_thread_index], us);
-
- us->mtu = 1024; /* $$$$ policy */
-
- us->c_lcl_ip4.as_u32 = ip0->dst_address.as_u32;
- us->c_rmt_ip4.as_u32 = ip0->src_address.as_u32;
- us->c_lcl_port = udp0->dst_port;
- us->c_rmt_port = udp0->src_port;
- us->c_proto = SESSION_TYPE_IP4_UDP;
- us->c_c_index = us - um->udp_sessions[my_thread_index];
-
- /*
- * create stream session and attach the udp session to it
- */
- rv = stream_session_accept (&us->connection, s0->session_index,
- SESSION_TYPE_IP4_UDP,
- 1 /*notify */ );
- if (rv)
- error0 = rv;
+ /* If session is owned by another thread and rx event needed,
+ * enqueue event now while we still have the peeker lock */
+ if (s0->thread_index != thread_index)
+ {
+ wrote0 = session_enqueue_dgram_connection (s0, hdr0, b,
+ TRANSPORT_PROTO_UDP,
+ /* queue event */ 0);
+ if (queue_event && !svm_fifo_has_event (s0->rx_fifo))
+ session_enqueue_notify (s0);
+ }
+ else
+ {
+ wrote0 = session_enqueue_dgram_connection (s0, hdr0, b,
+ TRANSPORT_PROTO_UDP,
+ queue_event);
+ }
+ ASSERT (wrote0 > 0);
- }
- else
- {
+unlock_rx_lock:
- error0 = SESSION_ERROR_NOT_READY;
- goto trace0;
- }
+ clib_spinlock_unlock (&uc0->rx_lock);
+}
- trace0:
- b0->error = node->errors[error0];
+always_inline session_t *
+udp_parse_and_lookup_buffer (vlib_buffer_t * b, session_dgram_hdr_t * hdr,
+ u8 is_ip4)
+{
+ udp_header_t *udp;
+ u32 fib_index;
+ session_t *s;
- if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE)
- && (b0->flags & VLIB_BUFFER_IS_TRACED)))
- {
- udp4_uri_input_trace_t *t =
- vlib_add_trace (vm, node, b0, sizeof (*t));
-
- t->session = ~0;
- if (s0)
- t->session = s0 - smm->sessions[my_thread_index];
- t->disposition = error0;
- t->thread_index = my_thread_index;
- }
+ /* udp_local hands us a pointer to the udp data */
+ udp = (udp_header_t *) (vlib_buffer_get_current (b) - sizeof (*udp));
+ fib_index = vnet_buffer (b)->ip.fib_index;
- /* verify speculative enqueue, maybe switch current next frame */
- vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
- to_next, n_left_to_next,
- bi0, next0);
- }
+ hdr->data_offset = 0;
+ hdr->lcl_port = udp->dst_port;
+ hdr->rmt_port = udp->src_port;
+ hdr->is_ip4 = is_ip4;
- vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+ if (is_ip4)
+ {
+ ip4_header_t *ip4;
+
+ /* TODO: must fix once udp_local does ip options correctly */
+ ip4 = (ip4_header_t *) (((u8 *) udp) - sizeof (*ip4));
+ ip_set (&hdr->lcl_ip, &ip4->dst_address, 1);
+ ip_set (&hdr->rmt_ip, &ip4->src_address, 1);
+ hdr->data_length = clib_net_to_host_u16 (ip4->length);
+ hdr->data_length -= sizeof (ip4_header_t) + sizeof (udp_header_t);
+ s = session_lookup_safe4 (fib_index, &ip4->dst_address,
+ &ip4->src_address, udp->dst_port,
+ udp->src_port, TRANSPORT_PROTO_UDP);
+ }
+ else
+ {
+ ip6_header_t *ip60;
+
+ ip60 = (ip6_header_t *) (((u8 *) udp) - sizeof (*ip60));
+ ip_set (&hdr->lcl_ip, &ip60->dst_address, 0);
+ ip_set (&hdr->rmt_ip, &ip60->src_address, 0);
+ hdr->data_length = clib_net_to_host_u16 (ip60->payload_length);
+ hdr->data_length -= sizeof (udp_header_t);
+ s = session_lookup_safe6 (fib_index, &ip60->dst_address,
+ &ip60->src_address, udp->dst_port,
+ udp->src_port, TRANSPORT_PROTO_UDP);
}
- /* Send enqueue events */
+ if (PREDICT_TRUE (!(b->flags & VLIB_BUFFER_NEXT_PRESENT)))
+ b->current_length = hdr->data_length;
+ else
+ b->total_length_not_including_first_buffer = hdr->data_length
+ - b->current_length;
- session_indices_to_enqueue =
- smm->session_indices_to_enqueue_by_thread[my_thread_index];
+ return s;
+}
- for (i = 0; i < vec_len (session_indices_to_enqueue); i++)
- {
- session_fifo_event_t evt;
- unix_shared_memory_queue_t *q;
- stream_session_t *s0;
- application_t *server0;
+always_inline uword
+udp46_input_inline (vlib_main_t * vm, vlib_node_runtime_t * node,
+ vlib_frame_t * frame, u8 is_ip4)
+{
+ u32 n_left_from, *from, errors, *first_buffer;
+ vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b;
+ u16 err_counters[UDP_N_ERROR] = { 0 };
+ u32 thread_index = vm->thread_index;
- /* Get session */
- s0 = pool_elt_at_index (smm->sessions[my_thread_index],
- session_indices_to_enqueue[i]);
+ from = first_buffer = vlib_frame_vector_args (frame);
+ n_left_from = frame->n_vectors;
+ vlib_get_buffers (vm, from, bufs, n_left_from);
- /* Get session's server */
- server0 = application_get (s0->app_index);
+ b = bufs;
- /* Built-in server? Deliver the goods... */
- if (server0->cb_fns.builtin_server_rx_callback)
- {
- server0->cb_fns.builtin_server_rx_callback (s0);
- continue;
- }
+ while (n_left_from > 0)
+ {
+ u32 error0 = UDP_ERROR_ENQUEUED;
+ session_dgram_hdr_t hdr0;
+ udp_connection_t *uc0;
+ session_t *s0;
- if (svm_fifo_set_event (s0->server_rx_fifo))
+ s0 = udp_parse_and_lookup_buffer (b[0], &hdr0, is_ip4);
+ if (PREDICT_FALSE (!s0))
{
- /* Fabricate event */
- evt.fifo = s0->server_rx_fifo;
- evt.event_type = FIFO_EVENT_APP_RX;
- evt.event_id = serial_number++;
+ error0 = UDP_ERROR_NO_LISTENER;
+ goto done;
+ }
- /* Add event to server's event queue */
- q = server0->event_queue;
+ /*
+ * If session exists pool peeker lock is taken at this point unless
+ * the session is already on the right thread or is a listener
+ */
- /* Don't block for lack of space */
- if (PREDICT_TRUE (q->cursize < q->maxsize))
+ if (s0->session_state == SESSION_STATE_OPENED)
+ {
+ u8 queue_event = 1;
+ uc0 = udp_connection_from_transport (session_get_transport (s0));
+ if (uc0->flags & UDP_CONN_F_CONNECTED)
{
- unix_shared_memory_queue_add (server0->event_queue,
- (u8 *) & evt,
- 0 /* do wait for mutex */ );
+ if (s0->thread_index != thread_index)
+ {
+ /*
+ * Clone the transport. It will be cleaned up with the
+ * session once we notify the session layer.
+ */
+ uc0 = udp_connection_clone_safe (s0->connection_index,
+ s0->thread_index);
+ ASSERT (s0->session_index == uc0->c_s_index);
+
+ /*
+ * Drop the peeker lock on pool resize and ask session
+ * layer for a new session.
+ */
+ session_pool_remove_peeker (s0->thread_index);
+ session_dgram_connect_notify (&uc0->connection,
+ s0->thread_index, &s0);
+ queue_event = 0;
+ }
+ else
+ s0->session_state = SESSION_STATE_READY;
}
- else
+ udp_connection_enqueue (uc0, s0, &hdr0, thread_index, b[0],
+ queue_event, &error0);
+ session_pool_remove_peeker (s0->thread_index);
+ }
+ else if (s0->session_state == SESSION_STATE_READY)
+ {
+ uc0 = udp_connection_from_transport (session_get_transport (s0));
+ udp_connection_enqueue (uc0, s0, &hdr0, thread_index, b[0], 1,
+ &error0);
+ }
+ else if (s0->session_state == SESSION_STATE_LISTENING)
+ {
+ uc0 = udp_connection_from_transport (session_get_transport (s0));
+ if (uc0->flags & UDP_CONN_F_CONNECTED)
{
- vlib_node_increment_counter (vm, udp4_uri_input_node.index,
- SESSION_ERROR_FIFO_FULL, 1);
+ uc0 = udp_connection_accept (uc0, &hdr0, thread_index);
+ if (!uc0)
+ {
+ error0 = UDP_ERROR_CREATE_SESSION;
+ goto done;
+ }
+ s0 = session_get (uc0->c_s_index, uc0->c_thread_index);
+ error0 = UDP_ERROR_ACCEPT;
}
+ udp_connection_enqueue (uc0, s0, &hdr0, thread_index, b[0], 1,
+ &error0);
}
- /* *INDENT-OFF* */
- if (1)
+ else
{
- ELOG_TYPE_DECLARE (e) =
- {
- .format = "evt-enqueue: id %d length %d",
- .format_args = "i4i4",};
- struct
- {
- u32 data[2];
- } *ed;
- ed = ELOG_DATA (&vlib_global_main.elog_main, e);
- ed->data[0] = evt.event_id;
- ed->data[1] = svm_fifo_max_dequeue (s0->server_rx_fifo);
+ error0 = UDP_ERROR_NOT_READY;
+ session_pool_remove_peeker (s0->thread_index);
}
- /* *INDENT-ON* */
- }
+ done:
+
+ b += 1;
+ n_left_from -= 1;
- vec_reset_length (session_indices_to_enqueue);
+ udp_inc_err_counter (err_counters, error0, 1);
- smm->session_indices_to_enqueue_by_thread[my_thread_index] =
- session_indices_to_enqueue;
+ if (PREDICT_FALSE (node->flags & VLIB_NODE_FLAG_TRACE))
+ udp_trace_buffer (vm, node, b[0], s0, error0);
+ }
+ vlib_buffer_free (vm, first_buffer, frame->n_vectors);
+ errors = session_main_flush_enqueue_events (TRANSPORT_PROTO_UDP,
+ thread_index);
+ err_counters[UDP_ERROR_MQ_FULL] = errors;
+ udp_store_err_counters (vm, is_ip4, err_counters);
return frame->n_vectors;
}
-VLIB_REGISTER_NODE (udp4_uri_input_node) =
+static uword
+udp4_input (vlib_main_t * vm, vlib_node_runtime_t * node,
+ vlib_frame_t * frame)
{
- .function = udp4_uri_input_node_fn,.name = "udp4-uri-input",.vector_size =
- sizeof (u32),.format_trace = format_udp4_uri_input_trace,.type =
- VLIB_NODE_TYPE_INTERNAL,.n_errors =
- ARRAY_LEN (udp4_uri_input_error_strings),.error_strings =
- udp4_uri_input_error_strings,.n_next_nodes = UDP4_URI_INPUT_N_NEXT,
- /* edit / add dispositions here */
- .next_nodes =
- {
- [UDP4_URI_INPUT_NEXT_DROP] = "error-drop",}
-,};
+ return udp46_input_inline (vm, node, frame, 1);
+}
+
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE (udp4_input_node) =
+{
+ .function = udp4_input,
+ .name = "udp4-input",
+ .vector_size = sizeof (u32),
+ .format_trace = format_udp_input_trace,
+ .type = VLIB_NODE_TYPE_INTERNAL,
+ .n_errors = ARRAY_LEN (udp_error_strings),
+ .error_strings = udp_error_strings,
+ .n_next_nodes = UDP_INPUT_N_NEXT,
+ .next_nodes = {
+#define _(s, n) [UDP_INPUT_NEXT_##s] = n,
+ foreach_udp_input_next
+#undef _
+ },
+};
+/* *INDENT-ON* */
+
+static uword
+udp6_input (vlib_main_t * vm, vlib_node_runtime_t * node,
+ vlib_frame_t * frame)
+{
+ return udp46_input_inline (vm, node, frame, 0);
+}
+
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE (udp6_input_node) =
+{
+ .function = udp6_input,
+ .name = "udp6-input",
+ .vector_size = sizeof (u32),
+ .format_trace = format_udp_input_trace,
+ .type = VLIB_NODE_TYPE_INTERNAL,
+ .n_errors = ARRAY_LEN (udp_error_strings),
+ .error_strings = udp_error_strings,
+ .n_next_nodes = UDP_INPUT_N_NEXT,
+ .next_nodes = {
+#define _(s, n) [UDP_INPUT_NEXT_##s] = n,
+ foreach_udp_input_next
+#undef _
+ },
+};
+/* *INDENT-ON* */
/*
* fd.io coding-style-patch-verification: ON