2 * Copyright (c) 2017 Cisco and/or its affiliates.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at:
7 * http://www.apache.org/licenses/LICENSE-2.0
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
17 * @brief Session and session manager
20 #include <vnet/session/session.h>
21 #include <vnet/session/session_debug.h>
22 #include <vnet/session/application.h>
23 #include <vlibmemory/api.h>
24 #include <vnet/dpo/load_balance.h>
25 #include <vnet/fib/ip4_fib.h>
26 #include <vnet/tcp/tcp.h>
28 session_manager_main_t session_manager_main;
29 extern transport_proto_vft_t *tp_vfts;
32 stream_session_create_i (segment_manager_t * sm, transport_connection_t * tc,
33 u8 alloc_fifos, stream_session_t ** ret_s)
35 session_manager_main_t *smm = &session_manager_main;
36 svm_fifo_t *server_rx_fifo = 0, *server_tx_fifo = 0;
37 u32 fifo_segment_index;
41 u32 thread_index = tc->thread_index;
44 ASSERT (thread_index == vlib_get_thread_index ());
46 /* Create the session */
47 pool_get_aligned (smm->sessions[thread_index], s, CLIB_CACHE_LINE_BYTES);
48 memset (s, 0, sizeof (*s));
49 pool_index = s - smm->sessions[thread_index];
54 if ((rv = segment_manager_alloc_session_fifos (sm, &server_rx_fifo,
56 &fifo_segment_index)))
58 pool_put (smm->sessions[thread_index], s);
61 /* Initialize backpointers */
62 server_rx_fifo->master_session_index = pool_index;
63 server_rx_fifo->master_thread_index = thread_index;
65 server_tx_fifo->master_session_index = pool_index;
66 server_tx_fifo->master_thread_index = thread_index;
68 s->server_rx_fifo = server_rx_fifo;
69 s->server_tx_fifo = server_tx_fifo;
70 s->svm_segment_index = fifo_segment_index;
73 /* Initialize state machine, such as it is... */
74 s->session_type = session_type_from_proto_and_ip (tc->transport_proto,
76 s->session_state = SESSION_STATE_CONNECTING;
77 s->thread_index = thread_index;
78 s->session_index = pool_index;
80 /* Attach transport to session */
81 s->connection_index = tc->c_index;
83 /* Attach session to transport */
84 tc->s_index = s->session_index;
86 /* Add to the main lookup table */
87 value = stream_session_handle (s);
88 stream_session_table_add_for_tc (tc, value);
95 /** Enqueue buffer chain tail */
97 session_enqueue_chain_tail (stream_session_t * s, vlib_buffer_t * b,
98 u32 offset, u8 is_in_order)
100 vlib_buffer_t *chain_b;
101 u32 chain_bi = b->next_buffer;
102 vlib_main_t *vm = vlib_get_main ();
109 chain_b = vlib_get_buffer (vm, chain_bi);
110 data = vlib_buffer_get_current (chain_b);
111 len = chain_b->current_length;
114 rv = svm_fifo_enqueue_nowait (s->server_rx_fifo, len, data);
117 return (rv > 0) ? (written + rv) : written;
123 rv = svm_fifo_enqueue_with_offset (s->server_rx_fifo, offset, len,
130 while ((chain_bi = (chain_b->flags & VLIB_BUFFER_NEXT_PRESENT)
131 ? chain_b->next_buffer : 0));
140 * Enqueue data for delivery to session peer. Does not notify peer of enqueue
141 * event but on request can queue notification events for later delivery by
142 * calling stream_server_flush_enqueue_events().
144 * @param tc Transport connection which is to be enqueued data
145 * @param b Buffer to be enqueued
146 * @param offset Offset at which to start enqueueing if out-of-order
147 * @param queue_event Flag to indicate if peer is to be notified or if event
148 * is to be queued. The former is useful when more data is
149 * enqueued and only one event is to be generated.
150 * @param is_in_order Flag to indicate if data is in order
151 * @return Number of bytes enqueued or a negative value if enqueueing failed.
154 stream_session_enqueue_data (transport_connection_t * tc, vlib_buffer_t * b,
155 u32 offset, u8 queue_event, u8 is_in_order)
158 int enqueued = 0, rv;
160 s = stream_session_get (tc->s_index, tc->thread_index);
165 svm_fifo_enqueue_nowait (s->server_rx_fifo, b->current_length,
166 vlib_buffer_get_current (b));
168 ((b->flags & VLIB_BUFFER_NEXT_PRESENT) && enqueued > 0))
170 rv = session_enqueue_chain_tail (s, b, 0, 1);
178 rv = svm_fifo_enqueue_with_offset (s->server_rx_fifo, offset,
180 vlib_buffer_get_current (b));
181 if (PREDICT_FALSE ((b->flags & VLIB_BUFFER_NEXT_PRESENT) && !rv))
182 rv = session_enqueue_chain_tail (s, b, offset + b->current_length, 0);
189 /* Queue RX event on this fifo. Eventually these will need to be flushed
190 * by calling stream_server_flush_enqueue_events () */
191 session_manager_main_t *smm = vnet_get_session_manager_main ();
192 u32 thread_index = s->thread_index;
193 u32 my_enqueue_epoch = smm->current_enqueue_epoch[thread_index];
195 if (s->enqueue_epoch != my_enqueue_epoch)
197 s->enqueue_epoch = my_enqueue_epoch;
198 vec_add1 (smm->session_indices_to_enqueue_by_thread[thread_index],
199 s - smm->sessions[thread_index]);
209 /** Check if we have space in rx fifo to push more bytes */
211 stream_session_no_space (transport_connection_t * tc, u32 thread_index,
214 stream_session_t *s = stream_session_get (tc->s_index, thread_index);
216 if (PREDICT_FALSE (s->session_state != SESSION_STATE_READY))
219 if (data_len > svm_fifo_max_enqueue (s->server_rx_fifo))
226 stream_session_tx_fifo_max_dequeue (transport_connection_t * tc)
228 stream_session_t *s = stream_session_get (tc->s_index, tc->thread_index);
229 if (s->session_state != SESSION_STATE_READY)
231 return svm_fifo_max_dequeue (s->server_tx_fifo);
235 stream_session_peek_bytes (transport_connection_t * tc, u8 * buffer,
236 u32 offset, u32 max_bytes)
238 stream_session_t *s = stream_session_get (tc->s_index, tc->thread_index);
239 return svm_fifo_peek (s->server_tx_fifo, offset, max_bytes, buffer);
243 stream_session_dequeue_drop (transport_connection_t * tc, u32 max_bytes)
245 stream_session_t *s = stream_session_get (tc->s_index, tc->thread_index);
246 return svm_fifo_dequeue_drop (s->server_tx_fifo, max_bytes);
250 * Notify session peer that new data has been enqueued.
252 * @param s Stream session for which the event is to be generated.
253 * @param block Flag to indicate if call should block if event queue is full.
255 * @return 0 on succes or negative number if failed to send notification.
258 stream_session_enqueue_notify (stream_session_t * s, u8 block)
261 session_fifo_event_t evt;
262 unix_shared_memory_queue_t *q;
263 static u32 serial_number;
265 if (PREDICT_FALSE (s->session_state == SESSION_STATE_CLOSED))
268 /* Get session's server */
269 app = application_get (s->app_index);
271 /* Built-in server? Hand event to the callback... */
272 if (app->cb_fns.builtin_server_rx_callback)
273 return app->cb_fns.builtin_server_rx_callback (s);
275 /* If no event, send one */
276 if (svm_fifo_set_event (s->server_rx_fifo))
278 /* Fabricate event */
279 evt.fifo = s->server_rx_fifo;
280 evt.event_type = FIFO_EVENT_APP_RX;
281 evt.event_id = serial_number++;
283 /* Add event to server's event queue */
284 q = app->event_queue;
286 /* Based on request block (or not) for lack of space */
287 if (block || PREDICT_TRUE (q->cursize < q->maxsize))
288 unix_shared_memory_queue_add (app->event_queue, (u8 *) & evt,
289 0 /* do wait for mutex */ );
292 clib_warning ("fifo full");
298 SESSION_EVT_DBG(SESSION_EVT_ENQ, s, ({
299 ed->data[0] = evt.event_id;
300 ed->data[1] = svm_fifo_max_dequeue (s->server_rx_fifo);
308 * Flushes queue of sessions that are to be notified of new data
311 * @param thread_index Thread index for which the flush is to be performed.
312 * @return 0 on success or a positive number indicating the number of
313 * failures due to API queue being full.
316 session_manager_flush_enqueue_events (u32 thread_index)
318 session_manager_main_t *smm = &session_manager_main;
319 u32 *session_indices_to_enqueue;
322 session_indices_to_enqueue =
323 smm->session_indices_to_enqueue_by_thread[thread_index];
325 for (i = 0; i < vec_len (session_indices_to_enqueue); i++)
327 stream_session_t *s0;
330 s0 = stream_session_get (session_indices_to_enqueue[i], thread_index);
331 if (stream_session_enqueue_notify (s0, 0 /* don't block */ ))
337 vec_reset_length (session_indices_to_enqueue);
339 smm->session_indices_to_enqueue_by_thread[thread_index] =
340 session_indices_to_enqueue;
342 /* Increment enqueue epoch for next round */
343 smm->current_enqueue_epoch[thread_index]++;
349 * Init fifo tail and head pointers
351 * Useful if transport uses absolute offsets for tracking ooo segments.
354 stream_session_init_fifos_pointers (transport_connection_t * tc,
355 u32 rx_pointer, u32 tx_pointer)
358 s = stream_session_get (tc->s_index, tc->thread_index);
359 svm_fifo_init_pointers (s->server_rx_fifo, rx_pointer);
360 svm_fifo_init_pointers (s->server_tx_fifo, tx_pointer);
364 stream_session_connect_notify (transport_connection_t * tc, u8 is_fail)
367 stream_session_t *new_s = 0;
372 handle = stream_session_half_open_lookup_handle (&tc->lcl_ip, &tc->rmt_ip,
373 tc->lcl_port, tc->rmt_port,
374 tc->transport_proto);
375 if (handle == HALF_OPEN_LOOKUP_INVALID_VALUE)
377 clib_warning ("This can't be good!");
381 /* Get the app's index from the handle we stored when opening connection */
382 app = application_get (handle >> 32);
383 api_context = tc->s_index;
387 segment_manager_t *sm;
389 sm = application_get_connect_segment_manager (app);
390 alloc_fifos = application_is_proxy (app);
391 /* Create new session (svm segments are allocated if needed) */
392 if (stream_session_create_i (sm, tc, alloc_fifos, &new_s))
398 new_s->app_index = app->index;
401 /* Notify client application */
402 if (app->cb_fns.session_connected_callback (app->index, api_context, new_s,
405 clib_warning ("failed to notify app");
407 stream_session_disconnect (new_s);
412 new_s->session_state = SESSION_STATE_READY;
415 /* Cleanup session lookup */
416 stream_session_half_open_table_del (tc);
422 stream_session_accept_notify (transport_connection_t * tc)
424 application_t *server;
427 s = stream_session_get (tc->s_index, tc->thread_index);
428 server = application_get (s->app_index);
429 server->cb_fns.session_accept_callback (s);
433 * Notification from transport that connection is being closed.
435 * A disconnect is sent to application but state is not removed. Once
436 * disconnect is acknowledged by application, session disconnect is called.
437 * Ultimately this leads to close being called on transport (passive close).
440 stream_session_disconnect_notify (transport_connection_t * tc)
442 application_t *server;
445 s = stream_session_get (tc->s_index, tc->thread_index);
446 server = application_get (s->app_index);
447 server->cb_fns.session_disconnect_callback (s);
451 * Cleans up session and associated app if needed.
454 stream_session_delete (stream_session_t * s)
456 session_manager_main_t *smm = vnet_get_session_manager_main ();
459 /* Delete from the main lookup table. */
460 if ((rv = stream_session_table_del (s)))
461 clib_warning ("hash delete error, rv %d", rv);
463 /* Cleanup fifo segments */
464 segment_manager_dealloc_fifos (s->svm_segment_index, s->server_rx_fifo,
467 pool_put (smm->sessions[s->thread_index], s);
469 memset (s, 0xFA, sizeof (*s));
473 * Notification from transport that connection is being deleted
475 * This should be called only on previously fully established sessions. For
476 * instance failed connects should call stream_session_connect_notify and
477 * indicate that the connect has failed.
480 stream_session_delete_notify (transport_connection_t * tc)
484 /* App might've been removed already */
485 s = stream_session_get_if_valid (tc->s_index, tc->thread_index);
490 stream_session_delete (s);
494 * Notify application that connection has been reset.
497 stream_session_reset_notify (transport_connection_t * tc)
501 s = stream_session_get (tc->s_index, tc->thread_index);
503 app = application_get (s->app_index);
504 app->cb_fns.session_reset_callback (s);
508 * Accept a stream session. Optionally ping the server by callback.
511 stream_session_accept (transport_connection_t * tc, u32 listener_index,
514 application_t *server;
515 stream_session_t *s, *listener;
516 segment_manager_t *sm;
520 /* Find the server */
521 listener = listen_session_get (sst, listener_index);
522 server = application_get (listener->app_index);
524 sm = application_get_listen_segment_manager (server, listener);
525 if ((rv = stream_session_create_i (sm, tc, 1, &s)))
528 s->app_index = server->index;
529 s->listener_index = listener_index;
530 s->session_state = SESSION_STATE_ACCEPTING;
532 /* Shoulder-tap the server */
535 server->cb_fns.session_accept_callback (s);
542 * Ask transport to open connection to remote transport endpoint.
544 * Stores handle for matching request with reply since the call can be
545 * asynchronous. For instance, for TCP the 3-way handshake must complete
546 * before reply comes. Session is only created once connection is established.
548 * @param app_index Index of the application requesting the connect
549 * @param st Session type requested.
550 * @param tep Remote transport endpoint
551 * @param res Resulting transport connection .
554 stream_session_open (u32 app_index, session_type_t st,
555 transport_endpoint_t * rmt,
556 transport_connection_t ** res)
558 transport_connection_t *tc;
562 rv = tp_vfts[st].open (rmt);
565 clib_warning ("Transport failed to open connection.");
566 return VNET_API_ERROR_SESSION_CONNECT_FAIL;
569 tc = tp_vfts[st].get_half_open ((u32) rv);
571 /* Save app and tc index. The latter is needed to help establish the
572 * connection while the former is needed when the connect notify comes
573 * and we have to notify the external app */
574 handle = (((u64) app_index) << 32) | (u64) tc->c_index;
576 /* Add to the half-open lookup table */
577 stream_session_half_open_table_add (tc, handle);
585 * Ask transport to listen on local transport endpoint.
587 * @param s Session for which listen will be called. Note that unlike
588 * established sessions, listen sessions are not associated to a
590 * @param tep Local endpoint to be listened on.
593 stream_session_listen (stream_session_t * s, transport_endpoint_t * tep)
595 transport_connection_t *tc;
598 /* Transport bind/listen */
599 tci = tp_vfts[s->session_type].bind (s->session_index, tep);
601 if (tci == (u32) ~ 0)
604 /* Attach transport to session */
605 s->connection_index = tci;
606 tc = tp_vfts[s->session_type].get_listener (tci);
608 /* Weird but handle it ... */
612 /* Add to the main lookup table */
613 stream_session_table_add_for_tc (tc, s->session_index);
619 * Ask transport to stop listening on local transport endpoint.
621 * @param s Session to stop listening on. It must be in state LISTENING.
624 stream_session_stop_listen (stream_session_t * s)
626 transport_connection_t *tc;
628 if (s->session_state != SESSION_STATE_LISTENING)
630 clib_warning ("not a listening session");
634 tc = tp_vfts[s->session_type].get_listener (s->connection_index);
637 clib_warning ("no transport");
638 return VNET_API_ERROR_ADDRESS_NOT_IN_USE;
641 stream_session_table_del_for_tc (tc);
642 tp_vfts[s->session_type].unbind (s->connection_index);
647 session_send_session_evt_to_thread (u64 session_handle,
648 fifo_event_type_t evt_type,
651 static u16 serial_number = 0;
652 session_fifo_event_t evt;
653 unix_shared_memory_queue_t *q;
655 /* Fabricate event */
656 evt.session_handle = session_handle;
657 evt.event_type = evt_type;
658 evt.event_id = serial_number++;
660 q = session_manager_get_vpp_event_queue (thread_index);
662 /* Based on request block (or not) for lack of space */
663 if (PREDICT_TRUE (q->cursize < q->maxsize))
665 if (unix_shared_memory_queue_add (q, (u8 *) & evt,
666 1 /* do wait for mutex */ ))
668 clib_warning ("failed to enqueue evt");
673 clib_warning ("queue full");
679 * Disconnect session and propagate to transport. This should eventually
680 * result in a delete notification that allows us to cleanup session state.
681 * Called for both active/passive disconnects.
683 * Should be called from the session's thread.
686 stream_session_disconnect (stream_session_t * s)
688 s->session_state = SESSION_STATE_CLOSED;
689 tp_vfts[s->session_type].close (s->connection_index, s->thread_index);
693 * Cleanup transport and session state.
695 * Notify transport of the cleanup, wait for a delete notify to actually
696 * remove the session state.
699 stream_session_cleanup (stream_session_t * s)
703 s->session_state = SESSION_STATE_CLOSED;
705 /* Delete from the main lookup table to avoid more enqueues */
706 rv = stream_session_table_del (s);
708 clib_warning ("hash delete error, rv %d", rv);
710 tp_vfts[s->session_type].cleanup (s->connection_index, s->thread_index);
714 * Allocate vpp event queue (once) per worker thread
717 session_vpp_event_queue_allocate (session_manager_main_t * smm,
720 api_main_t *am = &api_main;
722 u32 event_queue_length = 2048;
724 if (smm->vpp_event_queues[thread_index] == 0)
726 /* Allocate event fifo in the /vpe-api shared-memory segment */
727 oldheap = svm_push_data_heap (am->vlib_rp);
729 if (smm->configured_event_queue_length)
730 event_queue_length = smm->configured_event_queue_length;
732 smm->vpp_event_queues[thread_index] =
733 unix_shared_memory_queue_init
735 sizeof (session_fifo_event_t), 0 /* consumer pid */ ,
736 0 /* (do not) send signal when queue non-empty */ );
738 svm_pop_heap (oldheap);
743 session_type_from_proto_and_ip (transport_proto_t proto, u8 is_ip4)
745 if (proto == TRANSPORT_PROTO_TCP)
748 return SESSION_TYPE_IP4_TCP;
750 return SESSION_TYPE_IP6_TCP;
755 return SESSION_TYPE_IP4_UDP;
757 return SESSION_TYPE_IP6_UDP;
760 return SESSION_N_TYPES;
763 static clib_error_t *
764 session_manager_main_enable (vlib_main_t * vm)
766 session_manager_main_t *smm = &session_manager_main;
767 vlib_thread_main_t *vtm = vlib_get_thread_main ();
769 u32 preallocated_sessions_per_worker;
772 num_threads = 1 /* main thread */ + vtm->n_threads;
775 return clib_error_return (0, "n_thread_stacks not set");
777 /* $$$ config parameters */
778 svm_fifo_segment_init (0x200000000ULL /* first segment base VA */ ,
779 20 /* timeout in seconds */ );
781 /* configure per-thread ** vectors */
782 vec_validate (smm->sessions, num_threads - 1);
783 vec_validate (smm->session_indices_to_enqueue_by_thread, num_threads - 1);
784 vec_validate (smm->tx_buffers, num_threads - 1);
785 vec_validate (smm->pending_event_vector, num_threads - 1);
786 vec_validate (smm->free_event_vector, num_threads - 1);
787 vec_validate (smm->current_enqueue_epoch, num_threads - 1);
788 vec_validate (smm->vpp_event_queues, num_threads - 1);
790 for (i = 0; i < num_threads; i++)
792 vec_validate (smm->free_event_vector[i], 0);
793 _vec_len (smm->free_event_vector[i]) = 0;
794 vec_validate (smm->pending_event_vector[i], 0);
795 _vec_len (smm->pending_event_vector[i]) = 0;
799 vec_validate (smm->last_event_poll_by_thread, num_threads - 1);
802 /* Allocate vpp event queues */
803 for (i = 0; i < vec_len (smm->vpp_event_queues); i++)
804 session_vpp_event_queue_allocate (smm, i);
806 /* Preallocate sessions */
807 if (num_threads == 1)
809 for (i = 0; i < smm->preallocated_sessions; i++)
811 stream_session_t *ss __attribute__ ((unused));
812 pool_get_aligned (smm->sessions[0], ss, CLIB_CACHE_LINE_BYTES);
815 for (i = 0; i < smm->preallocated_sessions; i++)
816 pool_put_index (smm->sessions[0], i);
821 preallocated_sessions_per_worker = smm->preallocated_sessions /
824 for (j = 1; j < num_threads; j++)
826 for (i = 0; i < preallocated_sessions_per_worker; i++)
828 stream_session_t *ss __attribute__ ((unused));
829 pool_get_aligned (smm->sessions[j], ss, CLIB_CACHE_LINE_BYTES);
831 for (i = 0; i < preallocated_sessions_per_worker; i++)
832 pool_put_index (smm->sessions[j], i);
836 session_lookup_init ();
840 /* Enable TCP transport */
841 vnet_tcp_enable_disable (vm, 1);
847 session_node_enable_disable (u8 is_en)
849 u8 state = is_en ? VLIB_NODE_STATE_POLLING : VLIB_NODE_STATE_DISABLED;
851 foreach_vlib_main (({
852 vlib_node_set_state (this_vlib_main, session_queue_node.index,
859 vnet_session_enable_disable (vlib_main_t * vm, u8 is_en)
863 if (session_manager_main.is_enabled)
866 session_node_enable_disable (is_en);
868 return session_manager_main_enable (vm);
872 session_manager_main.is_enabled = 0;
873 session_node_enable_disable (is_en);
880 session_manager_main_init (vlib_main_t * vm)
882 session_manager_main_t *smm = &session_manager_main;
887 VLIB_INIT_FUNCTION (session_manager_main_init);
889 static clib_error_t *
890 session_config_fn (vlib_main_t * vm, unformat_input_t * input)
892 session_manager_main_t *smm = &session_manager_main;
896 while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
898 if (unformat (input, "event-queue-length %d", &nitems))
901 smm->configured_event_queue_length = nitems;
903 clib_warning ("event queue length %d too small, ignored", nitems);
905 else if (unformat (input, "preallocated-sessions %d",
906 &smm->preallocated_sessions))
908 else if (unformat (input, "v4-session-table-buckets %d",
909 &smm->configured_v4_session_table_buckets))
911 else if (unformat (input, "v4-halfopen-table-buckets %d",
912 &smm->configured_v4_halfopen_table_buckets))
914 else if (unformat (input, "v6-session-table-buckets %d",
915 &smm->configured_v6_session_table_buckets))
917 else if (unformat (input, "v6-halfopen-table-buckets %d",
918 &smm->configured_v6_halfopen_table_buckets))
920 else if (unformat (input, "v4-session-table-memory %U",
921 unformat_memory_size, &tmp))
923 if (tmp >= 0x100000000)
924 return clib_error_return (0, "memory size %llx (%lld) too large",
926 smm->configured_v4_session_table_memory = tmp;
928 else if (unformat (input, "v4-halfopen-table-memory %U",
929 unformat_memory_size, &tmp))
931 if (tmp >= 0x100000000)
932 return clib_error_return (0, "memory size %llx (%lld) too large",
934 smm->configured_v4_halfopen_table_memory = tmp;
936 else if (unformat (input, "v6-session-table-memory %U",
937 unformat_memory_size, &tmp))
939 if (tmp >= 0x100000000)
940 return clib_error_return (0, "memory size %llx (%lld) too large",
942 smm->configured_v6_session_table_memory = tmp;
944 else if (unformat (input, "v6-halfopen-table-memory %U",
945 unformat_memory_size, &tmp))
947 if (tmp >= 0x100000000)
948 return clib_error_return (0, "memory size %llx (%lld) too large",
950 smm->configured_v6_halfopen_table_memory = tmp;
953 return clib_error_return (0, "unknown input `%U'",
954 format_unformat_error, input);
959 VLIB_CONFIG_FUNCTION (session_config_fn, "session");
962 * fd.io coding-style-patch-verification: ON
965 * eval: (c-set-style "gnu")