X-Git-Url: https://gerrit.fd.io/r/gitweb?a=blobdiff_plain;f=src%2Fvnet%2Fsession%2Fsession_node.c;h=1471696554788b2ee9924a59708394df36b2aa15;hb=7fb0fe1;hp=78116178d2b03545414a68a8132cfe294e445f42;hpb=6a9b68b268f9dc87a221fac0ec9b0b67c16106ca;p=vpp.git diff --git a/src/vnet/session/session_node.c b/src/vnet/session/session_node.c index 78116178d2b..14716965547 100644 --- a/src/vnet/session/session_node.c +++ b/src/vnet/session/session_node.c @@ -16,11 +16,11 @@ #include #include #include -#include #include +#include #include #include -#include +#include vlib_node_registration_t session_queue_node; @@ -64,20 +64,13 @@ static char *session_queue_error_strings[] = { #undef _ }; -static u32 session_type_to_next[] = { - SESSION_QUEUE_NEXT_TCP_IP4_OUTPUT, - SESSION_QUEUE_NEXT_IP4_LOOKUP, - SESSION_QUEUE_NEXT_TCP_IP6_OUTPUT, - SESSION_QUEUE_NEXT_IP6_LOOKUP, -}; - always_inline void session_tx_fifo_chain_tail (session_manager_main_t * smm, vlib_main_t * vm, u8 thread_index, svm_fifo_t * fifo, vlib_buffer_t * b0, u32 bi0, u8 n_bufs_per_seg, u32 left_from_seg, u32 * left_to_snd0, u16 * n_bufs, u32 * tx_offset, u16 deq_per_buf, - u8 peek_data) + u8 peek_data, transport_tx_fn_type_t tx_type) { vlib_buffer_t *chain_b0, *prev_b0; u32 chain_bi0, to_deq; @@ -109,7 +102,23 @@ session_tx_fifo_chain_tail (session_manager_main_t * smm, vlib_main_t * vm, } else { - n_bytes_read = svm_fifo_dequeue_nowait (fifo, len_to_deq0, data0); + if (tx_type == TRANSPORT_TX_DGRAM) + { + session_dgram_hdr_t *hdr; + u16 deq_now; + hdr = (session_dgram_hdr_t *) svm_fifo_head (fifo); + deq_now = clib_min (hdr->data_length - hdr->data_offset, + len_to_deq0); + n_bytes_read = svm_fifo_peek (fifo, hdr->data_offset, deq_now, + data0); + ASSERT (n_bytes_read > 0); + + hdr->data_offset += n_bytes_read; + if (hdr->data_offset == hdr->data_length) + svm_fifo_dequeue_drop (fifo, hdr->data_length); + } + else + n_bytes_read = svm_fifo_dequeue_nowait (fifo, len_to_deq0, data0); } ASSERT (n_bytes_read == len_to_deq0); chain_b0->current_length = n_bytes_read; @@ -143,6 +152,7 @@ session_tx_fifo_read_and_snd_i (vlib_main_t * vm, vlib_node_runtime_t * node, u32 n_bufs_per_evt, n_frames_per_evt, n_bufs_per_frame; transport_connection_t *tc0; transport_proto_vft_t *transport_vft; + transport_proto_t tp; u32 next_index, next0, *to_next, n_left_to_next, bi0; vlib_buffer_t *b0; u32 tx_offset = 0, max_dequeue0, n_bytes_per_seg, left_for_seg; @@ -150,12 +160,36 @@ session_tx_fifo_read_and_snd_i (vlib_main_t * vm, vlib_node_runtime_t * node, u8 *data0; int i, n_bytes_read; u32 n_bytes_per_buf, deq_per_buf, deq_per_first_buf; - u32 buffers_allocated, buffers_allocated_this_call; - - next_index = next0 = session_type_to_next[s0->session_type]; + u32 bufs_alloc, bufs_now; + session_dgram_hdr_t hdr; - transport_vft = transport_protocol_get_vft (s0->session_type); - tc0 = transport_vft->get_connection (s0->connection_index, thread_index); + next_index = next0 = smm->session_type_to_next[s0->session_type]; + tp = session_get_transport_proto (s0); + transport_vft = transport_protocol_get_vft (tp); + if (peek_data) + { + if (PREDICT_FALSE (s0->session_state < SESSION_STATE_READY)) + { + /* Can retransmit for closed sessions but can't send new data if + * session is not ready or closed */ + vec_add1 (smm->pending_event_vector[thread_index], *e0); + return 0; + } + tc0 = + transport_vft->get_connection (s0->connection_index, thread_index); + } + else + { + if (s0->session_state == SESSION_STATE_LISTENING) + tc0 = transport_vft->get_listener (s0->connection_index); + else + { + if (PREDICT_FALSE (s0->session_state == SESSION_STATE_CLOSED)) + return 0; + tc0 = transport_vft->get_connection (s0->connection_index, + thread_index); + } + } /* Make sure we have space to send and there's something to dequeue */ snd_mss0 = transport_vft->send_mss (tc0); @@ -173,20 +207,26 @@ session_tx_fifo_read_and_snd_i (vlib_main_t * vm, vlib_node_runtime_t * node, /* Check how much we can pull. */ max_dequeue0 = svm_fifo_max_dequeue (s0->server_tx_fifo); - if (peek_data) { /* Offset in rx fifo from where to peek data */ tx_offset = transport_vft->tx_fifo_offset (tc0); if (PREDICT_FALSE (tx_offset >= max_dequeue0)) - max_dequeue0 = 0; - else - max_dequeue0 -= tx_offset; + return 0; + max_dequeue0 -= tx_offset; } - - /* Nothing to read return */ - if (max_dequeue0 == 0) - return 0; + else + { + if (transport_vft->tx_type == TRANSPORT_TX_DGRAM) + { + if (max_dequeue0 < sizeof (hdr)) + return 0; + svm_fifo_peek (s0->server_tx_fifo, 0, sizeof (hdr), (u8 *) & hdr); + ASSERT (hdr.data_length > hdr.data_offset); + max_dequeue0 = hdr.data_length - hdr.data_offset; + } + } + ASSERT (max_dequeue0 > 0); /* Ensure we're not writing more than transport window allows */ if (max_dequeue0 < snd_space0) @@ -223,23 +263,19 @@ session_tx_fifo_read_and_snd_i (vlib_main_t * vm, vlib_node_runtime_t * node, { vec_validate (smm->tx_buffers[thread_index], n_bufs + n_bufs_per_frame - 1); - buffers_allocated = 0; + bufs_alloc = 0; do { - buffers_allocated_this_call = vlib_buffer_alloc (vm, - &smm->tx_buffers - [thread_index] - [n_bufs + - buffers_allocated], - n_bufs_per_frame - - - buffers_allocated); - buffers_allocated += buffers_allocated_this_call; + bufs_now = + vlib_buffer_alloc (vm, + &smm->tx_buffers[thread_index][n_bufs + + bufs_alloc], + n_bufs_per_frame - bufs_alloc); + bufs_alloc += bufs_now; } - while (buffers_allocated_this_call > 0 - && ((buffers_allocated + n_bufs < n_bufs_per_frame))); + while (bufs_now > 0 && ((bufs_alloc + n_bufs < n_bufs_per_frame))); - n_bufs += buffers_allocated; + n_bufs += bufs_alloc; _vec_len (smm->tx_buffers[thread_index]) = n_bufs; if (PREDICT_FALSE (n_bufs < n_bufs_per_frame)) @@ -257,6 +293,15 @@ session_tx_fifo_read_and_snd_i (vlib_main_t * vm, vlib_node_runtime_t * node, * Handle first buffer in chain separately */ + len_to_deq0 = clib_min (left_to_snd0, deq_per_first_buf); + if (left_to_snd0 > len_to_deq0 && n_left_to_next > 1) + { + vlib_buffer_t *pb; + u32 pbi = smm->tx_buffers[thread_index][n_bufs - 2]; + pb = vlib_get_buffer (vm, pbi); + vlib_prefetch_buffer_header (pb, LOAD); + } + /* Get free buffer */ ASSERT (n_bufs >= 1); bi0 = smm->tx_buffers[thread_index][--n_bufs]; @@ -269,11 +314,10 @@ session_tx_fifo_read_and_snd_i (vlib_main_t * vm, vlib_node_runtime_t * node, b0 = vlib_get_buffer (vm, bi0); b0->error = 0; - b0->flags |= VNET_BUFFER_F_LOCALLY_ORIGINATED; + b0->flags = VNET_BUFFER_F_LOCALLY_ORIGINATED; b0->current_data = 0; b0->total_length_not_including_first_buffer = 0; - len_to_deq0 = clib_min (left_to_snd0, deq_per_first_buf); data0 = vlib_buffer_make_headroom (b0, MAX_HDRS_LEN); if (peek_data) { @@ -287,14 +331,42 @@ session_tx_fifo_read_and_snd_i (vlib_main_t * vm, vlib_node_runtime_t * node, } else { - n_bytes_read = svm_fifo_dequeue_nowait (s0->server_tx_fifo, - len_to_deq0, data0); - if (n_bytes_read <= 0) - goto dequeue_fail; + if (transport_vft->tx_type == TRANSPORT_TX_DGRAM) + { + svm_fifo_t *f = s0->server_tx_fifo; + u16 deq_now; + u32 offset; + + ASSERT (hdr.data_length > hdr.data_offset); + deq_now = clib_min (hdr.data_length - hdr.data_offset, + len_to_deq0); + offset = hdr.data_offset + SESSION_CONN_HDR_LEN; + n_bytes_read = svm_fifo_peek (f, offset, deq_now, data0); + if (PREDICT_FALSE (n_bytes_read <= 0)) + goto dequeue_fail; + + if (s0->session_state == SESSION_STATE_LISTENING) + { + ip_copy (&tc0->rmt_ip, &hdr.rmt_ip, tc0->is_ip4); + tc0->rmt_port = hdr.rmt_port; + } + hdr.data_offset += n_bytes_read; + if (hdr.data_offset == hdr.data_length) + { + offset = hdr.data_length + SESSION_CONN_HDR_LEN; + svm_fifo_dequeue_drop (f, offset); + } + } + else + { + n_bytes_read = svm_fifo_dequeue_nowait (s0->server_tx_fifo, + len_to_deq0, data0); + if (n_bytes_read <= 0) + goto dequeue_fail; + } } b0->current_length = n_bytes_read; - left_to_snd0 -= n_bytes_read; *n_tx_packets = *n_tx_packets + 1; @@ -308,7 +380,8 @@ session_tx_fifo_read_and_snd_i (vlib_main_t * vm, vlib_node_runtime_t * node, s0->server_tx_fifo, b0, bi0, n_bufs_per_seg, left_for_seg, &left_to_snd0, &n_bufs, &tx_offset, - deq_per_buf, peek_data); + deq_per_buf, peek_data, + transport_vft->tx_type); } /* Ask transport to push header after current_length and @@ -325,8 +398,6 @@ session_tx_fifo_read_and_snd_i (vlib_main_t * vm, vlib_node_runtime_t * node, /* *INDENT-ON* */ VLIB_BUFFER_TRACE_TRAJECTORY_INIT (b0); - if (VLIB_BUFFER_TRACE_TRAJECTORY) - b0->pre_data[1] = 3; if (PREDICT_FALSE (n_trace > 0)) { @@ -348,12 +419,18 @@ session_tx_fifo_read_and_snd_i (vlib_main_t * vm, vlib_node_runtime_t * node, /* If we couldn't dequeue all bytes mark as partially read */ if (max_len_to_snd0 < max_dequeue0) + if (svm_fifo_set_event (s0->server_tx_fifo)) + vec_add1 (smm->pending_event_vector[thread_index], *e0); + + if (!peek_data && transport_vft->tx_type == TRANSPORT_TX_DGRAM) { - /* If we don't already have new event */ - if (svm_fifo_set_event (s0->server_tx_fifo)) - { - vec_add1 (smm->pending_event_vector[thread_index], *e0); - } + /* Fix dgram pre header */ + if (max_len_to_snd0 < max_dequeue0) + svm_fifo_overwrite_head (s0->server_tx_fifo, (u8 *) & hdr, + sizeof (session_dgram_pre_hdr_t)); + /* More data needs to be read */ + else if (svm_fifo_max_dequeue (s0->server_tx_fifo) > 0) + vec_add1 (smm->pending_event_vector[thread_index], *e0); } return 0; @@ -363,7 +440,6 @@ dequeue_fail: * read, return buff to free list and return */ clib_warning ("dequeue fail"); - if (svm_fifo_set_event (s0->server_tx_fifo)) { vec_add1 (smm->pending_event_vector[thread_index], *e0); @@ -396,6 +472,20 @@ session_tx_fifo_dequeue_and_snd (vlib_main_t * vm, vlib_node_runtime_t * node, n_tx_pkts, 0); } +int +session_tx_fifo_dequeue_internal (vlib_main_t * vm, + vlib_node_runtime_t * node, + session_manager_main_t * smm, + session_fifo_event_t * e0, + stream_session_t * s0, u32 thread_index, + int *n_tx_pkts) +{ + application_t *app; + app = application_get (s0->opaque); + svm_fifo_unset_event (s0->server_tx_fifo); + return app->cb_fns.builtin_app_tx_callback (s0); +} + always_inline stream_session_t * session_event_get_session (session_fifo_event_t * e, u8 thread_index) { @@ -413,7 +503,7 @@ dump_thread_0_event_queue (void) int i, index; i8 *headp; - unix_shared_memory_queue_t *q; + svm_queue_t *q; q = smm->vpp_event_queues[my_thread_index]; index = q->head; @@ -493,7 +583,7 @@ u8 session_node_lookup_fifo_event (svm_fifo_t * f, session_fifo_event_t * e) { session_manager_main_t *smm = vnet_get_session_manager_main (); - unix_shared_memory_queue_t *q; + svm_queue_t *q; session_fifo_event_t *pending_event_vector, *evt; int i, index, found = 0; i8 *headp; @@ -512,7 +602,7 @@ session_node_lookup_fifo_event (svm_fifo_t * f, session_fifo_event_t * e) clib_memcpy (e, headp, q->elsize); found = session_node_cmp_event (e, f); if (found) - break; + return 1; if (++index == q->maxsize) index = 0; } @@ -540,7 +630,7 @@ session_queue_node_fn (vlib_main_t * vm, vlib_node_runtime_t * node, session_fifo_event_t *my_pending_event_vector, *pending_disconnects, *e; session_fifo_event_t *my_fifo_events; u32 n_to_dequeue, n_events; - unix_shared_memory_queue_t *q; + svm_queue_t *q; application_t *app; int n_tx_packets = 0; u32 my_thread_index = vm->thread_index; @@ -551,9 +641,9 @@ session_queue_node_fn (vlib_main_t * vm, vlib_node_runtime_t * node, SESSION_EVT_DBG (SESSION_EVT_POLL_GAP_TRACK, smm, my_thread_index); /* - * Update TCP time + * Update transport time */ - tcp_update_time (now, my_thread_index); + transport_update_time (now, my_thread_index); /* * Get vpp queue events @@ -593,7 +683,7 @@ session_queue_node_fn (vlib_main_t * vm, vlib_node_runtime_t * node, for (i = 0; i < n_to_dequeue; i++) { vec_add2 (my_fifo_events, e, 1); - unix_shared_memory_queue_sub_raw (q, (u8 *) e); + svm_queue_sub_raw (q, (u8 *) e); } /* The other side of the connection is not polling */ @@ -627,10 +717,6 @@ skip_dequeue: clib_warning ("It's dead, Jim!"); continue; } - /* Can retransmit for closed sessions but can't do anything if - * session is not ready or closed */ - if (PREDICT_FALSE (s0->session_state < SESSION_STATE_READY)) - continue; /* Spray packets in per session type frames, since they go to * different nodes */ rv = (smm->session_tx_fns[s0->session_type]) (vm, node, smm, e0, s0, @@ -653,7 +739,7 @@ skip_dequeue: continue; } s0 = session_get_from_handle (e0->session_handle); - stream_session_disconnect (s0); + stream_session_disconnect_transport (s0); break; case FIFO_EVENT_BUILTIN_RX: s0 = session_event_get_session (e0, my_thread_index); @@ -661,7 +747,7 @@ skip_dequeue: continue; svm_fifo_unset_event (s0->server_rx_fifo); app = application_get (s0->app_index); - app->cb_fns.builtin_server_rx_callback (s0); + app->cb_fns.builtin_app_rx_callback (s0); break; case FIFO_EVENT_RPC: fp = e0->rpc_args.fp; @@ -693,19 +779,29 @@ VLIB_REGISTER_NODE (session_queue_node) = .type = VLIB_NODE_TYPE_INPUT, .n_errors = ARRAY_LEN (session_queue_error_strings), .error_strings = session_queue_error_strings, - .n_next_nodes = SESSION_QUEUE_N_NEXT, .state = VLIB_NODE_STATE_DISABLED, - .next_nodes = - { - [SESSION_QUEUE_NEXT_DROP] = "error-drop", - [SESSION_QUEUE_NEXT_IP4_LOOKUP] = "ip4-lookup", - [SESSION_QUEUE_NEXT_IP6_LOOKUP] = "ip6-lookup", - [SESSION_QUEUE_NEXT_TCP_IP4_OUTPUT] = "tcp4-output", - [SESSION_QUEUE_NEXT_TCP_IP6_OUTPUT] = "tcp6-output", - }, }; /* *INDENT-ON* */ +static clib_error_t * +session_queue_exit (vlib_main_t * vm) +{ + if (vec_len (vlib_mains) < 2) + return 0; + + /* + * Shut off (especially) worker-thread session nodes. + * Otherwise, vpp can crash as the main thread unmaps the + * API segment. + */ + vlib_worker_thread_barrier_sync (vm); + session_node_enable_disable (0 /* is_enable */ ); + vlib_worker_thread_barrier_release (vm); + return 0; +} + +VLIB_MAIN_LOOP_EXIT_FUNCTION (session_queue_exit); + /* * fd.io coding-style-patch-verification: ON *