From d79b41e993981df80245b0e6d90eb691bdaae648 Mon Sep 17 00:00:00 2001 From: Florin Coras Date: Sat, 4 Mar 2017 05:37:52 -0800 Subject: [PATCH] VPP-659 TCP improvements - builtin test echo server - fix SYN-ACK retransmit canceling - avoid sending spurious ACK if in LAST_ACK - improved client dummy test app - renamed tx fifo dequeuing and sending functions to avoid confusion - improved RST handling Change-Id: Ia14aad3df319540dcf6e6a4e18a9f8d423a4b83b Signed-off-by: Florin Coras Signed-off-by: Dave Barach --- src/scripts/vnet/uri/{tcp_server => afp_setup.cli} | 0 src/scripts/vnet/uri/dummy_app.py | 26 +++++- src/scripts/vnet/uri/tap_setup.cli | 5 ++ src/uri/uri_tcp_test.c | 7 +- src/vnet/session/application.c | 16 ++++ src/vnet/session/application.h | 3 +- src/vnet/session/application_interface.c | 5 +- src/vnet/session/application_interface.h | 4 +- src/vnet/session/node.c | 56 +++++++------ src/vnet/session/session.c | 38 ++++++--- src/vnet/session/session.h | 28 +++---- src/vnet/session/session_api.c | 50 +++++++++++- src/vnet/session/transport.h | 66 +++++++-------- src/vnet/tcp/builtin_server.c | 94 ++++++++++++++++++++-- src/vnet/tcp/tcp.c | 63 ++++++++++++--- src/vnet/tcp/tcp.h | 14 +++- src/vnet/tcp/tcp_error.def | 11 +-- src/vnet/tcp/tcp_input.c | 63 ++++++++++----- src/vnet/tcp/tcp_output.c | 47 ++++++++--- src/vnet/udp/builtin_server.c | 2 +- src/vnet/udp/udp_input.c | 14 ++-- src/vnet/unix/tapcli.c | 3 +- 22 files changed, 454 insertions(+), 161 deletions(-) rename src/scripts/vnet/uri/{tcp_server => afp_setup.cli} (100%) create mode 100644 src/scripts/vnet/uri/tap_setup.cli diff --git a/src/scripts/vnet/uri/tcp_server b/src/scripts/vnet/uri/afp_setup.cli similarity index 100% rename from src/scripts/vnet/uri/tcp_server rename to src/scripts/vnet/uri/afp_setup.cli diff --git a/src/scripts/vnet/uri/dummy_app.py b/src/scripts/vnet/uri/dummy_app.py index b80fbb28163..50333923d6e 100644 --- a/src/scripts/vnet/uri/dummy_app.py +++ b/src/scripts/vnet/uri/dummy_app.py @@ -2,7 +2,7 @@ import socket import sys -import bitstring +import time # action can be reflect or drop action = "drop" @@ -22,6 +22,7 @@ def handle_connection (connection, client_address): def run_server(ip, port): print("Starting server {}:{}".format(repr(ip), repr(port))) sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) + sock.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1) server_address = (ip, int(port)) sock.bind(server_address) sock.listen(1) @@ -39,12 +40,31 @@ def prepare_data(): def run_client(ip, port): print("Starting client {}:{}".format(repr(ip), repr(port))) sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) - server_address = ("6.0.1.1", 1234) + server_address = (ip, port) sock.connect(server_address) data = prepare_data() + n_rcvd = 0 + n_sent = len (data) try: sock.sendall(data) + + timeout = time.time() + 2 + while n_rcvd < n_sent and time.time() < timeout: + tmp = sock.recv(1500) + tmp = bytearray (tmp) + n_read = len(tmp) + for i in range(n_read): + if (data[n_rcvd + i] != tmp[i]): + print("Difference at byte {}. Sent {} got {}" + .format(n_rcvd + i, data[n_rcvd + i], tmp[i])) + n_rcvd += n_read + + if (n_rcvd < n_sent or n_rcvd > n_sent): + print("Sent {} and got back {}".format(n_sent, n_rcvd)) + else: + print("Got back what we've sent!!"); + finally: sock.close() @@ -62,4 +82,4 @@ if __name__ == "__main__": if (len(sys.argv) == 5): action = sys.argv[4] - run (sys.argv[1], sys.argv[2], sys.argv[3]) + run (sys.argv[1], sys.argv[2], int(sys.argv[3])) diff --git a/src/scripts/vnet/uri/tap_setup.cli b/src/scripts/vnet/uri/tap_setup.cli new file mode 100644 index 00000000000..1d9a1b36287 --- /dev/null +++ b/src/scripts/vnet/uri/tap_setup.cli @@ -0,0 +1,5 @@ +tap connect tap0 address 6.0.1.2/24 +set int ip addr tap-0 6.0.1.1/24 +set int state tap-0 up +trace add tapcli-rx 10 +session enable diff --git a/src/uri/uri_tcp_test.c b/src/uri/uri_tcp_test.c index 6c9cf1dbac9..261fd2889d6 100644 --- a/src/uri/uri_tcp_test.c +++ b/src/uri/uri_tcp_test.c @@ -287,6 +287,7 @@ vl_api_reset_session_t_handler (vl_api_reset_session_t * mp) session = pool_elt_at_index (utm->sessions, p[0]); hash_unset (utm->session_index_by_vpp_handles, key); pool_put (utm->sessions, session); + utm->time_to_stop = 1; } else { @@ -296,7 +297,7 @@ vl_api_reset_session_t_handler (vl_api_reset_session_t * mp) rmp = vl_msg_api_alloc (sizeof (*rmp)); memset (rmp, 0, sizeof (*rmp)); - rmp->_vl_msg_id = ntohs (VL_API_DISCONNECT_SESSION_REPLY); + rmp->_vl_msg_id = ntohs (VL_API_RESET_SESSION_REPLY); rmp->retval = rv; rmp->session_index = mp->session_index; rmp->session_thread_index = mp->session_thread_index; @@ -734,7 +735,7 @@ server_handle_fifo_event_rx (uri_tcp_test_main_t * utm, { rv = svm_fifo_enqueue_nowait (tx_fifo, 0, n_read, utm->rx_buf); } - while (rv == -2); + while (rv == -2 && !utm->time_to_stop); /* Fabricate TX event, send to vpp */ evt.fifo = tx_fifo; @@ -750,7 +751,7 @@ server_handle_fifo_event_rx (uri_tcp_test_main_t * utm, if (n_read > 0) bytes -= n_read; } - while (n_read < 0 || bytes > 0); + while ((n_read < 0 || bytes > 0) && !utm->time_to_stop); } void diff --git a/src/vnet/session/application.c b/src/vnet/session/application.c index a542eebe43f..513e5faca9c 100644 --- a/src/vnet/session/application.c +++ b/src/vnet/session/application.c @@ -92,6 +92,19 @@ application_del (application_t * app) pool_put (app_pool, app); } +static void +application_verify_cb_fns (application_type_t type, session_cb_vft_t * cb_fns) +{ + if (type == APP_SERVER && cb_fns->session_accept_callback == 0) + clib_warning ("No accept callback function provided"); + if (type == APP_CLIENT && cb_fns->session_connected_callback == 0) + clib_warning ("No session connected callback function provided"); + if (cb_fns->session_disconnect_callback == 0) + clib_warning ("No session disconnect callback function provided"); + if (cb_fns->session_reset_callback == 0) + clib_warning ("No session reset callback function provided"); +} + application_t * application_new (application_type_t type, session_type_t sst, u32 api_client_index, u32 flags, session_cb_vft_t * cb_fns) @@ -142,6 +155,9 @@ application_new (application_type_t type, session_type_t sst, app->flags = flags; app->cb_fns = *cb_fns; + /* Check that the obvious things are properly set up */ + application_verify_cb_fns (type, cb_fns); + /* Add app to lookup by api_client_index table */ application_table_add (app); diff --git a/src/vnet/session/application.h b/src/vnet/session/application.h index 480828f7b34..a60a8b8b5f9 100644 --- a/src/vnet/session/application.h +++ b/src/vnet/session/application.h @@ -45,7 +45,8 @@ typedef struct _stream_session_cb_vft void (*session_reset_callback) (stream_session_t * s); /* Direct RX callback, for built-in servers */ - int (*builtin_server_rx_callback) (stream_session_t * session); + int (*builtin_server_rx_callback) (stream_session_t * session, + session_fifo_event_t * ep); /* Redirect connection to local server */ int (*redirect_connect_callback) (u32 api_client_index, void *mp); diff --git a/src/vnet/session/application_interface.c b/src/vnet/session/application_interface.c index 6ddfb70f609..4b30bd876a7 100644 --- a/src/vnet/session/application_interface.c +++ b/src/vnet/session/application_interface.c @@ -98,7 +98,7 @@ vnet_bind_i (u32 api_client_index, ip46_address_t * ip46, u16 port_host_order, if (application_lookup (api_client_index)) { - clib_warning ("Only one bind supported for now"); + clib_warning ("Only one connection supported for now"); return VNET_API_ERROR_ADDRESS_IN_USE; } @@ -364,8 +364,7 @@ vnet_connect_uri (vnet_connect_args_t * a) } int -vnet_disconnect_session (u32 client_index, u32 session_index, - u32 thread_index) +vnet_disconnect_session (u32 session_index, u32 thread_index) { stream_session_t *session; diff --git a/src/vnet/session/application_interface.h b/src/vnet/session/application_interface.h index 8d87c067841..a5f2b9a6a1a 100644 --- a/src/vnet/session/application_interface.h +++ b/src/vnet/session/application_interface.h @@ -112,9 +112,7 @@ typedef enum int vnet_bind_uri (vnet_bind_args_t *); int vnet_unbind_uri (char *uri, u32 api_client_index); int vnet_connect_uri (vnet_connect_args_t * a); -int -vnet_disconnect_session (u32 client_index, u32 session_index, - u32 thread_index); +int vnet_disconnect_session (u32 session_index, u32 thread_index); int vnet_bind (vnet_bind_args_t * a); int vnet_connect (vnet_connect_args_t * a); diff --git a/src/vnet/session/node.c b/src/vnet/session/node.c index 399077decb0..7fd7e0b7499 100644 --- a/src/vnet/session/node.c +++ b/src/vnet/session/node.c @@ -78,10 +78,11 @@ static u32 session_type_to_next[] = { }; always_inline int -session_fifo_rx_i (vlib_main_t * vm, vlib_node_runtime_t * node, - session_manager_main_t * smm, session_fifo_event_t * e0, - stream_session_t * s0, u32 thread_index, int *n_tx_packets, - u8 peek_data) +session_tx_fifo_read_and_snd_i (vlib_main_t * vm, vlib_node_runtime_t * node, + session_manager_main_t * smm, + session_fifo_event_t * e0, + stream_session_t * s0, u32 thread_index, + int *n_tx_packets, u8 peek_data) { u32 n_trace = vlib_get_trace_count (vm, node); u32 left_to_snd0, max_len_to_snd0, len_to_deq0, n_bufs, snd_space0; @@ -120,7 +121,7 @@ session_fifo_rx_i (vlib_main_t * vm, vlib_node_runtime_t * node, if (peek_data) { /* Offset in rx fifo from where to peek data */ - rx_offset = transport_vft->rx_fifo_offset (tc0); + rx_offset = transport_vft->tx_fifo_offset (tc0); } /* TODO check if transport is willing to send len_to_snd0 @@ -194,25 +195,27 @@ session_fifo_rx_i (vlib_main_t * vm, vlib_node_runtime_t * node, t0->server_thread_index = s0->thread_index; } + len_to_deq0 = (left_to_snd0 < snd_mss0) ? left_to_snd0 : snd_mss0; + /* *INDENT-OFF* */ if (1) { ELOG_TYPE_DECLARE (e) = { - .format = "evt-dequeue: id %d length %d", - .format_args = "i4i4", + .format = "evt-deq: id %d len %d rd %d wnd %d", + .format_args = "i4i4i4i4", }; struct { - u32 data[2]; + u32 data[4]; } *ed; ed = ELOG_DATA (&vm->elog_main, e); ed->data[0] = e0->event_id; ed->data[1] = e0->enqueue_length; + ed->data[2] = len_to_deq0; + ed->data[3] = left_to_snd0; } /* *INDENT-ON* */ - len_to_deq0 = (left_to_snd0 < snd_mss0) ? left_to_snd0 : snd_mss0; - /* Make room for headers */ data0 = vlib_buffer_make_headroom (b0, MAX_HDRS_LEN); @@ -276,22 +279,25 @@ dequeue_fail: } int -session_fifo_rx_peek (vlib_main_t * vm, vlib_node_runtime_t * node, - session_manager_main_t * smm, session_fifo_event_t * e0, - stream_session_t * s0, u32 thread_index, int *n_tx_pkts) +session_tx_fifo_peek_and_snd (vlib_main_t * vm, vlib_node_runtime_t * node, + session_manager_main_t * smm, + session_fifo_event_t * e0, + stream_session_t * s0, u32 thread_index, + int *n_tx_pkts) { - return session_fifo_rx_i (vm, node, smm, e0, s0, thread_index, n_tx_pkts, - 1); + return session_tx_fifo_read_and_snd_i (vm, node, smm, e0, s0, thread_index, + n_tx_pkts, 1); } int -session_fifo_rx_dequeue (vlib_main_t * vm, vlib_node_runtime_t * node, - session_manager_main_t * smm, - session_fifo_event_t * e0, stream_session_t * s0, - u32 thread_index, int *n_tx_pkts) +session_tx_fifo_dequeue_and_snd (vlib_main_t * vm, vlib_node_runtime_t * node, + session_manager_main_t * smm, + session_fifo_event_t * e0, + stream_session_t * s0, u32 thread_index, + int *n_tx_pkts) { - return session_fifo_rx_i (vm, node, smm, e0, s0, thread_index, n_tx_pkts, - 0); + return session_tx_fifo_read_and_snd_i (vm, node, smm, e0, s0, thread_index, + n_tx_pkts, 0); } static uword @@ -369,12 +375,16 @@ skip_dequeue: s0 = stream_session_get_if_valid (server_session_index0, my_thread_index); - if (!s0) + + if (CLIB_DEBUG && !s0) { - clib_warning ("It's dead Jim!"); + clib_warning ("It's dead, Jim!"); continue; } + if (PREDICT_FALSE (s0->session_state == SESSION_STATE_CLOSED)) + continue; + ASSERT (s0->thread_index == my_thread_index); switch (e0->event_type) diff --git a/src/vnet/session/session.c b/src/vnet/session/session.c index b5a168ca26c..8867e794eeb 100644 --- a/src/vnet/session/session.c +++ b/src/vnet/session/session.c @@ -373,7 +373,7 @@ stream_session_lookup_transport6 (ip6_address_t * lcl, ip6_address_t * rmt, /* Finally, try half-open connections */ rv = clib_bihash_search_inline_48_8 (&smm->v6_half_open_hash, &kv6); if (rv == 0) - return tp_vfts[s->session_type].get_half_open (kv6.value & 0xFFFFFFFF); + return tp_vfts[proto].get_half_open (kv6.value & 0xFFFFFFFF); return 0; } @@ -617,7 +617,10 @@ again: goto again; } else - return SESSION_ERROR_NO_SPACE; + { + clib_warning ("No space to allocate fifos!"); + return SESSION_ERROR_NO_SPACE; + } } return 0; } @@ -806,6 +809,10 @@ stream_session_enqueue_notify (stream_session_t * s, u8 block) evt.event_id = serial_number++; evt.enqueue_length = svm_fifo_max_dequeue (s->server_rx_fifo); + /* Built-in server? Hand event to the callback... */ + if (app->cb_fns.builtin_server_rx_callback) + return app->cb_fns.builtin_server_rx_callback (s, &evt); + /* Add event to server's event queue */ q = app->event_queue; @@ -1043,13 +1050,9 @@ stream_session_delete (stream_session_t * s) session_manager_main_t *smm = vnet_get_session_manager_main (); svm_fifo_segment_private_t *fifo_segment; application_t *app; - int rv; - /* delete from the main lookup table */ - rv = stream_session_table_del (smm, s); - - if (rv) - clib_warning ("hash delete error, rv %d", rv); + /* Delete from the main lookup table. */ + stream_session_table_del (smm, s); /* Cleanup fifo segments */ fifo_segment = svm_fifo_get_segment (s->server_segment_index); @@ -1197,18 +1200,30 @@ stream_session_open (u8 sst, ip46_address_t * addr, u16 port_host_byte_order, void stream_session_disconnect (stream_session_t * s) { - tp_vfts[s->session_type].close (s->connection_index, s->thread_index); s->session_state = SESSION_STATE_CLOSED; + tp_vfts[s->session_type].close (s->connection_index, s->thread_index); } /** * Cleanup transport and session state. + * + * Notify transport of the cleanup, wait for a delete notify to actually + * remove the session state. */ void stream_session_cleanup (stream_session_t * s) { + session_manager_main_t *smm = &session_manager_main; + int rv; + + s->session_state = SESSION_STATE_CLOSED; + + /* Delete from the main lookup table to avoid more enqueues */ + rv = stream_session_table_del (smm, s); + if (rv) + clib_warning ("hash delete error, rv %d", rv); + tp_vfts[s->session_type].cleanup (s->connection_index, s->thread_index); - stream_session_delete (s); } void @@ -1221,7 +1236,8 @@ session_register_transport (u8 type, const transport_proto_vft_t * vft) /* If an offset function is provided, then peek instead of dequeue */ smm->session_rx_fns[type] = - (vft->rx_fifo_offset) ? session_fifo_rx_peek : session_fifo_rx_dequeue; + (vft->tx_fifo_offset) ? session_tx_fifo_peek_and_snd : + session_tx_fifo_dequeue_and_snd; } transport_proto_vft_t * diff --git a/src/vnet/session/session.h b/src/vnet/session/session.h index 46e5ce2cdd0..1b712e2e59e 100644 --- a/src/vnet/session/session.h +++ b/src/vnet/session/session.h @@ -102,33 +102,33 @@ typedef CLIB_PACKED (struct typedef struct _stream_session_t { + /** fifo pointers. Once allocated, these do not move */ + svm_fifo_t *server_rx_fifo; + svm_fifo_t *server_tx_fifo; + /** Type */ u8 session_type; /** State */ u8 session_state; + u8 thread_index; + + /** used during unbind processing */ + u8 is_deleted; + + /** To avoid n**2 "one event per frame" check */ + u8 enqueue_epoch; + /** Session index in per_thread pool */ u32 session_index; /** Transport specific */ u32 connection_index; - u8 thread_index; - /** Application specific */ u32 pid; - /** fifo pointers. Once allocated, these do not move */ - svm_fifo_t *server_rx_fifo; - svm_fifo_t *server_tx_fifo; - - /** To avoid n**2 "one event per frame" check */ - u8 enqueue_epoch; - - /** used during unbind processing */ - u8 is_deleted; - /** stream server pool index */ u32 app_index; @@ -162,8 +162,8 @@ typedef int session_fifo_event_t * e0, stream_session_t * s0, u32 thread_index, int *n_tx_pkts); -extern session_fifo_rx_fn session_fifo_rx_peek; -extern session_fifo_rx_fn session_fifo_rx_dequeue; +extern session_fifo_rx_fn session_tx_fifo_peek_and_snd; +extern session_fifo_rx_fn session_tx_fifo_dequeue_and_snd; struct _session_manager_main { diff --git a/src/vnet/session/session_api.c b/src/vnet/session/session_api.c index 8852fc6ee08..9c38428aa36 100644 --- a/src/vnet/session/session_api.c +++ b/src/vnet/session/session_api.c @@ -130,6 +130,27 @@ send_session_disconnect_uri_callback (stream_session_t * s) vl_msg_api_send_shmem (q, (u8 *) & mp); } +static void +send_session_reset_uri_callback (stream_session_t * s) +{ + vl_api_reset_session_t *mp; + unix_shared_memory_queue_t *q; + application_t *app = application_get (s->app_index); + + q = vl_api_client_index_to_input_queue (app->api_client_index); + + if (!q) + return; + + mp = vl_msg_api_alloc (sizeof (*mp)); + memset (mp, 0, sizeof (*mp)); + mp->_vl_msg_id = clib_host_to_net_u16 (VL_API_RESET_SESSION); + + mp->session_thread_index = s->thread_index; + mp->session_index = s->session_index; + vl_msg_api_send_shmem (q, (u8 *) & mp); +} + static int send_session_connected_uri_callback (u32 api_client_index, stream_session_t * s, u8 is_fail) @@ -347,6 +368,26 @@ send_session_disconnect_callback (stream_session_t * s) vl_msg_api_send_shmem (q, (u8 *) & mp); } +static void +send_session_reset_callback (stream_session_t * s) +{ + vl_api_reset_sock_t *mp; + unix_shared_memory_queue_t *q; + application_t *app = application_get (s->app_index); + + q = vl_api_client_index_to_input_queue (app->api_client_index); + + if (!q) + return; + + mp = vl_msg_api_alloc (sizeof (*mp)); + memset (mp, 0, sizeof (*mp)); + mp->_vl_msg_id = clib_host_to_net_u16 (VL_API_RESET_SOCK); + + mp->handle = make_session_handle (s); + vl_msg_api_send_shmem (q, (u8 *) & mp); +} + /** * Redirect a connect_uri message to the indicated server. * Only sent if the server has bound the related port with @@ -414,6 +455,7 @@ static session_cb_vft_t uri_session_cb_vft = { .session_accept_callback = send_session_accept_uri_callback, .session_disconnect_callback = send_session_disconnect_uri_callback, .session_connected_callback = send_session_connected_uri_callback, + .session_reset_callback = send_session_reset_uri_callback, .add_segment_callback = send_add_segment_callback, .redirect_connect_callback = redirect_connect_uri_callback }; @@ -422,6 +464,7 @@ static session_cb_vft_t session_cb_vft = { .session_accept_callback = send_session_accept_callback, .session_disconnect_callback = send_session_disconnect_callback, .session_connected_callback = send_session_connected_callback, + .session_reset_callback = send_session_reset_callback, .add_segment_callback = send_add_segment_callback, .redirect_connect_callback = redirect_connect_callback }; @@ -548,8 +591,8 @@ vl_api_disconnect_session_t_handler (vl_api_disconnect_session_t * mp) rv = api_session_not_valid (mp->session_index, mp->session_thread_index); if (!rv) - rv = vnet_disconnect_session (mp->client_index, mp->session_index, - mp->session_thread_index); + rv = + vnet_disconnect_session (mp->session_index, mp->session_thread_index); REPLY_MACRO (VL_API_DISCONNECT_SESSION_REPLY); } @@ -572,8 +615,7 @@ vl_api_disconnect_session_reply_t_handler (vl_api_disconnect_session_reply_t * } /* Disconnect has been confirmed. Confirm close to transport */ - vnet_disconnect_session (mp->client_index, mp->session_index, - mp->session_thread_index); + vnet_disconnect_session (mp->session_index, mp->session_thread_index); } static void diff --git a/src/vnet/session/transport.h b/src/vnet/session/transport.h index f486dbb289f..0da30261bef 100644 --- a/src/vnet/session/transport.h +++ b/src/vnet/session/transport.h @@ -74,7 +74,7 @@ typedef struct _transport_proto_vft u32 (*push_header) (transport_connection_t * tconn, vlib_buffer_t * b); u16 (*send_mss) (transport_connection_t * tc); u32 (*send_space) (transport_connection_t * tc); - u32 (*rx_fifo_offset) (transport_connection_t * tc); + u32 (*tx_fifo_offset) (transport_connection_t * tc); /* * Connection retrieval @@ -92,39 +92,39 @@ typedef struct _transport_proto_vft } transport_proto_vft_t; +/* *INDENT-OFF* */ /* 16 octets */ -typedef CLIB_PACKED (struct - { - union - { - struct - { - ip4_address_t src; ip4_address_t dst; - u16 src_port; - u16 dst_port; - /* align by making this 4 octets even though its a 1-bit field - * NOTE: avoid key overlap with other transports that use 5 tuples for - * session identification. - */ - u32 proto; - }; - u64 as_u64[2]; - }; - }) v4_connection_key_t; - -typedef CLIB_PACKED (struct - { - union - { - struct - { - /* 48 octets */ - ip6_address_t src; ip6_address_t dst; - u16 src_port; - u16 dst_port; u32 proto; u8 unused_for_now[8]; - }; u64 as_u64[6]; - }; - }) v6_connection_key_t; +typedef CLIB_PACKED (struct { + union + { + struct + { + ip4_address_t src; ip4_address_t dst; + u16 src_port; + u16 dst_port; + /* align by making this 4 octets even though its a 1-bit field + * NOTE: avoid key overlap with other transports that use 5 tuples for + * session identification. + */ + u32 proto; + }; + u64 as_u64[2]; + }; +}) v4_connection_key_t; + +typedef CLIB_PACKED (struct { + union + { + struct + { + /* 48 octets */ + ip6_address_t src; ip6_address_t dst; + u16 src_port; + u16 dst_port; u32 proto; u8 unused_for_now[8]; + }; u64 as_u64[6]; + }; +}) v6_connection_key_t; +/* *INDENT-ON* */ typedef clib_bihash_kv_16_8_t session_kv4_t; typedef clib_bihash_kv_48_8_t session_kv6_t; diff --git a/src/vnet/tcp/builtin_server.c b/src/vnet/tcp/builtin_server.c index be65642ae3b..9b697a01d5f 100644 --- a/src/vnet/tcp/builtin_server.c +++ b/src/vnet/tcp/builtin_server.c @@ -18,10 +18,24 @@ #include #include +typedef struct +{ + u8 *rx_buf; + unix_shared_memory_queue_t **vpp_queue; + vlib_main_t *vlib_main; +} builtin_server_main_t; + +builtin_server_main_t builtin_server_main; + + int builtin_session_accept_callback (stream_session_t * s) { + builtin_server_main_t *bsm = &builtin_server_main; clib_warning ("called..."); + + bsm->vpp_queue[s->thread_index] = + session_manager_get_vpp_event_queue (s->thread_index); s->session_state = SESSION_STATE_READY; return 0; } @@ -30,8 +44,19 @@ void builtin_session_disconnect_callback (stream_session_t * s) { clib_warning ("called..."); + + vnet_disconnect_session (s->session_index, s->thread_index); } +void +builtin_session_reset_callback (stream_session_t * s) +{ + clib_warning ("called.. "); + + stream_session_cleanup (s); +} + + int builtin_session_connected_callback (u32 client_index, stream_session_t * s, u8 is_fail) @@ -56,9 +81,57 @@ builtin_redirect_connect_callback (u32 client_index, void *mp) } int -builtin_server_rx_callback (stream_session_t * s) +builtin_server_rx_callback (stream_session_t * s, session_fifo_event_t * e) { - clib_warning ("called..."); + int n_written, bytes, total_copy_bytes; + int n_read; + svm_fifo_t *tx_fifo; + builtin_server_main_t *bsm = &builtin_server_main; + session_fifo_event_t evt; + static int serial_number = 0; + + bytes = e->enqueue_length; + if (PREDICT_FALSE (bytes <= 0)) + { + clib_warning ("bizarre rx callback: bytes %d", bytes); + return 0; + } + + tx_fifo = s->server_tx_fifo; + + /* Number of bytes we're going to copy */ + total_copy_bytes = (bytes < (tx_fifo->nitems - tx_fifo->cursize)) ? bytes : + tx_fifo->nitems - tx_fifo->cursize; + + if (PREDICT_FALSE (total_copy_bytes <= 0)) + { + clib_warning ("no space in tx fifo, event had %d bytes", bytes); + return 0; + } + + vec_validate (bsm->rx_buf, total_copy_bytes - 1); + _vec_len (bsm->rx_buf) = total_copy_bytes; + + n_read = svm_fifo_dequeue_nowait (s->server_rx_fifo, 0, total_copy_bytes, + bsm->rx_buf); + ASSERT (n_read == total_copy_bytes); + + /* + * Echo back + */ + + n_written = svm_fifo_enqueue_nowait (tx_fifo, 0, n_read, bsm->rx_buf); + ASSERT (n_written == total_copy_bytes); + + /* Fabricate TX event, send to vpp */ + evt.fifo = tx_fifo; + evt.event_type = FIFO_EVENT_SERVER_TX; + evt.enqueue_length = total_copy_bytes; + evt.event_id = serial_number++; + + unix_shared_memory_queue_add (bsm->vpp_queue[s->thread_index], (u8 *) & evt, + 0 /* do wait for mutex */ ); + return 0; } @@ -68,7 +141,8 @@ static session_cb_vft_t builtin_session_cb_vft = { .session_connected_callback = builtin_session_connected_callback, .add_segment_callback = builtin_add_segment_callback, .redirect_connect_callback = builtin_redirect_connect_callback, - .builtin_server_rx_callback = builtin_server_rx_callback + .builtin_server_rx_callback = builtin_server_rx_callback, + .session_reset_callback = builtin_session_reset_callback }; static int @@ -77,6 +151,11 @@ server_create (vlib_main_t * vm) vnet_bind_args_t _a, *a = &_a; u64 options[SESSION_OPTIONS_N_OPTIONS]; char segment_name[128]; + u32 num_threads; + vlib_thread_main_t *vtm = vlib_get_thread_main (); + + num_threads = 1 /* main thread */ + vtm->n_threads; + vec_validate (builtin_server_main.vpp_queue, num_threads - 1); memset (a, 0, sizeof (*a)); memset (options, 0, sizeof (options)); @@ -110,6 +189,7 @@ server_create_command_fn (vlib_main_t * vm, } #endif + vnet_session_enable_disable (vm, 1 /* turn on TCP, etc. */ ); rv = server_create (vm); switch (rv) { @@ -121,10 +201,14 @@ server_create_command_fn (vlib_main_t * vm, return 0; } +/* *INDENT-OFF* */ VLIB_CLI_COMMAND (server_create_command, static) = { -.path = "test server",.short_help = "test server",.function = - server_create_command_fn,}; + .path = "test server", + .short_help = "test server", + .function = server_create_command_fn, +}; +/* *INDENT-ON* */ /* * fd.io coding-style-patch-verification: ON diff --git a/src/vnet/tcp/tcp.c b/src/vnet/tcp/tcp.c index 69433e26a8c..d2df5c3eae0 100644 --- a/src/vnet/tcp/tcp.c +++ b/src/vnet/tcp/tcp.c @@ -139,6 +139,20 @@ tcp_connection_del (tcp_connection_t * tc) tcp_connection_cleanup (tc); } +/** Notify session that connection has been reset. + * + * Switch state to closed and wait for session to call cleanup. + */ +void +tcp_connection_reset (tcp_connection_t * tc) +{ + if (tc->state == TCP_STATE_CLOSED) + return; + + tc->state = TCP_STATE_CLOSED; + stream_session_reset_notify (&tc->connection); +} + /** * Begin connection closing procedure. * @@ -149,6 +163,8 @@ tcp_connection_del (tcp_connection_t * tc) * calls cleanup. * 2) TIME_WAIT (active close) whereby after 2MSL the 2MSL timer triggers * and cleanup is called. + * + * N.B. Half-close connections are not supported */ void tcp_connection_close (tcp_connection_t * tc) @@ -166,9 +182,9 @@ tcp_connection_close (tcp_connection_t * tc) else if (tc->state == TCP_STATE_CLOSE_WAIT) tc->state = TCP_STATE_LAST_ACK; - /* Half-close connections are not supported XXX */ - - if (tc->state == TCP_STATE_CLOSED) + /* If in CLOSED and WAITCLOSE timer is not set, delete connection now */ + if (tc->timers[TCP_TIMER_WAITCLOSE] == TCP_TIMER_HANDLE_INVALID + && tc->state == TCP_STATE_CLOSED) tcp_connection_del (tc); } @@ -185,7 +201,10 @@ tcp_session_cleanup (u32 conn_index, u32 thread_index) { tcp_connection_t *tc; tc = tcp_connection_get (conn_index, thread_index); - tcp_connection_cleanup (tc); + + /* Wait for the session tx events to clear */ + tc->state = TCP_STATE_CLOSED; + tcp_timer_update (tc, TCP_TIMER_WAITCLOSE, TCP_CLEANUP_TIME); } void * @@ -227,7 +246,8 @@ tcp_allocate_local_port (tcp_main_t * tm, ip46_address_t * ip) { transport_endpoint_t *tep; u32 time_now, tei; - u16 min = 1024, max = 65535, tries; /* XXX configurable ? */ + u16 min = 1024, max = 65535; /* XXX configurable ? */ + int tries; tries = max - min; time_now = tcp_time_now (); @@ -505,10 +525,10 @@ tcp_session_send_space (transport_connection_t * trans_conn) } u32 -tcp_session_rx_fifo_offset (transport_connection_t * trans_conn) +tcp_session_tx_fifo_offset (transport_connection_t * trans_conn) { tcp_connection_t *tc = (tcp_connection_t *) trans_conn; - return (tc->snd_una_max - tc->snd_una); + return (tc->snd_nxt - tc->snd_una); } /* *INDENT-OFF* */ @@ -524,7 +544,7 @@ const static transport_proto_vft_t tcp4_proto = { .cleanup = tcp_session_cleanup, .send_mss = tcp_session_send_mss, .send_space = tcp_session_send_space, - .rx_fifo_offset = tcp_session_rx_fifo_offset, + .tx_fifo_offset = tcp_session_tx_fifo_offset, .format_connection = format_tcp_session_ip4, .format_listener = format_tcp_listener_session_ip4, .format_half_open = format_tcp_half_open_session_ip4 @@ -542,7 +562,7 @@ const static transport_proto_vft_t tcp6_proto = { .cleanup = tcp_session_cleanup, .send_mss = tcp_session_send_mss, .send_space = tcp_session_send_space, - .rx_fifo_offset = tcp_session_rx_fifo_offset, + .tx_fifo_offset = tcp_session_tx_fifo_offset, .format_connection = format_tcp_session_ip6, .format_listener = format_tcp_listener_session_ip6, .format_half_open = format_tcp_half_open_session_ip6 @@ -579,13 +599,32 @@ tcp_timer_establish_handler (u32 conn_index) } void -tcp_timer_2msl_handler (u32 conn_index) +tcp_timer_waitclose_handler (u32 conn_index) { u32 cpu_index = os_get_cpu_number (); tcp_connection_t *tc; tc = tcp_connection_get (conn_index, cpu_index); - tc->timers[TCP_TIMER_2MSL] = TCP_TIMER_HANDLE_INVALID; + tc->timers[TCP_TIMER_WAITCLOSE] = TCP_TIMER_HANDLE_INVALID; + + /* Session didn't come back with a close(). Send FIN either way + * and switch to LAST_ACK. */ + if (tc->state == TCP_STATE_CLOSE_WAIT) + { + if (tc->flags & TCP_CONN_FINSNT) + { + clib_warning ("FIN was sent and still in CLOSE WAIT. Weird!"); + } + + tcp_send_fin (tc); + tc->state = TCP_STATE_LAST_ACK; + + /* Make sure we don't wait in LAST ACK forever */ + tcp_timer_set (tc, TCP_TIMER_WAITCLOSE, TCP_2MSL_TIME); + + /* Don't delete the connection yet */ + return; + } tcp_connection_del (tc); } @@ -597,7 +636,7 @@ static timer_expiration_handler *timer_expiration_handlers[TCP_N_TIMERS] = tcp_timer_delack_handler, 0, tcp_timer_keep_handler, - tcp_timer_2msl_handler, + tcp_timer_waitclose_handler, tcp_timer_retransmit_syn_handler, tcp_timer_establish_handler }; diff --git a/src/vnet/tcp/tcp.h b/src/vnet/tcp/tcp.h index 7d443433fa2..3b3d8fc7cff 100644 --- a/src/vnet/tcp/tcp.h +++ b/src/vnet/tcp/tcp.h @@ -63,8 +63,8 @@ format_function_t format_tcp_state; _(DELACK, "DELAYED ACK") \ _(PERSIST, "PERSIST") \ _(KEEP, "KEEP") \ - _(2MSL, "2MSL") \ - _(RETRANSMIT_SYN, "RETRANSMIT_SYN") \ + _(WAITCLOSE, "WAIT CLOSE") \ + _(RETRANSMIT_SYN, "RETRANSMIT SYN") \ _(ESTABLISH, "ESTABLISH") typedef enum _tcp_timers @@ -89,6 +89,8 @@ extern timer_expiration_handler tcp_timer_retransmit_syn_handler; #define TCP_DELACK_TIME 1 /* 0.1s */ #define TCP_ESTABLISH_TIME 750 /* 75s */ #define TCP_2MSL_TIME 300 /* 30s */ +#define TCP_CLOSEWAIT_TIME 1 /* 0.1s */ +#define TCP_CLEANUP_TIME 5 /* 0.5s Time to wait before cleanup */ #define TCP_RTO_MAX 60 * THZ /* Min max RTO (60s) as per RFC6298 */ #define TCP_RTT_MAX 30 * THZ /* 30s (probably too much) */ @@ -102,6 +104,7 @@ void tcp_update_time (f64 now, u32 thread_index); _(DELACK, "Delay ACK") \ _(SNDACK, "Send ACK") \ _(BURSTACK, "Burst ACK set") \ + _(FINSNT, "FIN sent") \ _(SENT_RCV_WND0, "Sent 0 receive window") \ _(RECOVERY, "Recovery on") \ _(FAST_RECOVERY, "Fast Recovery on") @@ -331,6 +334,8 @@ clib_error_t *vnet_tcp_enable_disable (vlib_main_t * vm, u8 is_en); always_inline tcp_connection_t * tcp_connection_get (u32 conn_index, u32 thread_index) { + if (pool_is_free_index (tcp_main.connections[thread_index], conn_index)) + return 0; return pool_elt_at_index (tcp_main.connections[thread_index], conn_index); } @@ -347,6 +352,7 @@ tcp_connection_get_if_valid (u32 conn_index, u32 thread_index) void tcp_connection_close (tcp_connection_t * tc); void tcp_connection_cleanup (tcp_connection_t * tc); void tcp_connection_del (tcp_connection_t * tc); +void tcp_connection_reset (tcp_connection_t * tc); always_inline tcp_connection_t * tcp_listener_get (u32 tli) @@ -361,7 +367,7 @@ tcp_half_open_connection_get (u32 conn_index) } void tcp_make_ack (tcp_connection_t * ts, vlib_buffer_t * b); -void tcp_make_finack (tcp_connection_t * tc, vlib_buffer_t * b); +void tcp_make_fin (tcp_connection_t * tc, vlib_buffer_t * b); void tcp_make_synack (tcp_connection_t * ts, vlib_buffer_t * b); void tcp_send_reset (vlib_buffer_t * pkt, u8 is_ip4); void tcp_send_syn (tcp_connection_t * tc); @@ -467,7 +473,7 @@ tcp_timer_set (tcp_connection_t * tc, u8 timer_id, u32 interval) } always_inline void -tcp_retransmit_timer_set (tcp_main_t * tm, tcp_connection_t * tc) +tcp_retransmit_timer_set (tcp_connection_t * tc) { /* XXX Switch to faster TW */ tcp_timer_set (tc, TCP_TIMER_RETRANSMIT, diff --git a/src/vnet/tcp/tcp_error.def b/src/vnet/tcp/tcp_error.def index cff5ec13a70..2dbdd9b38d1 100644 --- a/src/vnet/tcp/tcp_error.def +++ b/src/vnet/tcp/tcp_error.def @@ -17,13 +17,13 @@ tcp_error (NONE, "no error") tcp_error (NO_LISTENER, "no listener for dst port") tcp_error (LOOKUP_DROPS, "lookup drops") tcp_error (DISPATCH, "Dispatch error") -tcp_error (ENQUEUED, "Packets pushed into rx fifo") +tcp_error (ENQUEUED, "Packets pushed into rx fifo") tcp_error (PURE_ACK, "Pure acks") tcp_error (SYNS_RCVD, "SYNs received") tcp_error (SYN_ACKS_RCVD, "SYN-ACKs received") -tcp_error (NOT_READY, "Session not ready for packets") -tcp_error (FIFO_FULL, "Packets dropped for lack of rx fifo space") -tcp_error (EVENT_FIFO_FULL, "Events not sent for lack of event fifo space") +tcp_error (NOT_READY, "Session not ready for packets") +tcp_error (FIFO_FULL, "Packets dropped for lack of rx fifo space") +tcp_error (EVENT_FIFO_FULL, "Events not sent for lack of event fifo space") tcp_error (API_QUEUE_FULL, "Sessions not created for lack of API queue space") tcp_error (CREATE_SESSION_FAIL, "Sessions couldn't be allocated") tcp_error (SEGMENT_INVALID, "Invalid segment") @@ -32,4 +32,5 @@ tcp_error (ACK_DUP, "Duplicate ACK") tcp_error (ACK_OLD, "Old ACK") tcp_error (PKTS_SENT, "Packets sent") tcp_error (FILTERED_DUPACKS, "Filtered duplicate ACKs") -tcp_error (RST_SENT, "Resets sent") \ No newline at end of file +tcp_error (RST_SENT, "Resets sent") +tcp_error (INVALID_CONNECTION, "Invalid connection") diff --git a/src/vnet/tcp/tcp_input.c b/src/vnet/tcp/tcp_input.c index 0a907d0a3d7..f19fbf874dc 100644 --- a/src/vnet/tcp/tcp_input.c +++ b/src/vnet/tcp/tcp_input.c @@ -274,10 +274,7 @@ tcp_segment_validate (vlib_main_t * vm, tcp_connection_t * tc0, /* 2nd: check the RST bit */ if (tcp_rst (th0)) { - /* Notify session that connection has been reset. Switch - * state to closed and await for session to do the cleanup. */ - stream_session_reset_notify (&tc0->connection); - tc0->state = TCP_STATE_CLOSED; + tcp_connection_reset (tc0); return -1; } @@ -1023,6 +1020,12 @@ tcp46_established_inline (vlib_main_t * vm, vlib_node_runtime_t * node, tc0 = tcp_connection_get (vnet_buffer (b0)->tcp.connection_index, my_thread_index); + if (PREDICT_FALSE (tc0 == 0)) + { + error0 = TCP_ERROR_INVALID_CONNECTION; + goto drop; + } + /* Checksum computed by ipx_local no need to compute again */ if (is_ip4) @@ -1072,12 +1075,12 @@ tcp46_established_inline (vlib_main_t * vm, vlib_node_runtime_t * node, /* 8: check the FIN bit */ if (tcp_fin (th0)) { - /* Send ACK and enter CLOSE-WAIT */ - tcp_make_ack (tc0, b0); - tcp_connection_force_ack (tc0, b0); - next0 = tcp_next_output (tc0->c_is_ip4); + /* Enter CLOSE-WAIT and notify session. Don't send ACK, instead + * wait for session to call close. To avoid lingering + * in CLOSE-WAIT, set timer (reuse WAITCLOSE). */ tc0->state = TCP_STATE_CLOSE_WAIT; stream_session_disconnect_notify (&tc0->connection); + tcp_timer_set (tc0, TCP_TIMER_WAITCLOSE, TCP_CLOSEWAIT_TIME); } drop: @@ -1468,7 +1471,7 @@ VLIB_REGISTER_NODE (tcp6_syn_sent_node) = VLIB_NODE_FUNCTION_MULTIARCH (tcp6_syn_sent_node, tcp6_syn_sent_rcv); /** - * Handles reception for all states except LISTEN, SYN-SEND and ESTABLISHED + * Handles reception for all states except LISTEN, SYN-SENT and ESTABLISHED * as per RFC793 p. 64 */ always_inline uword @@ -1511,6 +1514,11 @@ tcp46_rcv_process_inline (vlib_main_t * vm, vlib_node_runtime_t * node, b0 = vlib_get_buffer (vm, bi0); tc0 = tcp_connection_get (vnet_buffer (b0)->tcp.connection_index, my_thread_index); + if (PREDICT_FALSE (tc0 == 0)) + { + error0 = TCP_ERROR_INVALID_CONNECTION; + goto drop; + } /* Checksum computed by ipx_local no need to compute again */ @@ -1587,7 +1595,8 @@ tcp46_rcv_process_inline (vlib_main_t * vm, vlib_node_runtime_t * node, /* Shoulder tap the server */ stream_session_accept_notify (&tc0->connection); - tcp_timer_reset (tc0, TCP_TIMER_RETRANSMIT_SYN); + /* Reset SYN-ACK retransmit timer */ + tcp_timer_reset (tc0, TCP_TIMER_RETRANSMIT); break; case TCP_STATE_ESTABLISHED: /* We can get packets in established state here because they @@ -1602,9 +1611,14 @@ tcp46_rcv_process_inline (vlib_main_t * vm, vlib_node_runtime_t * node, * continue processing in that state. */ if (tcp_rcv_ack (tc0, b0, tcp0, &next0, &error0)) goto drop; - tc0->state = TCP_STATE_FIN_WAIT_2; - /* Stop all timers, 2MSL will be set lower */ - tcp_connection_timers_reset (tc0); + + /* If FIN is ACKed */ + if (tc0->snd_una == tc0->snd_una_max) + { + tc0->state = TCP_STATE_FIN_WAIT_2; + /* Stop all timers, 2MSL will be set lower */ + tcp_connection_timers_reset (tc0); + } break; case TCP_STATE_FIN_WAIT_2: /* In addition to the processing for the ESTABLISHED state, if @@ -1639,7 +1653,17 @@ tcp46_rcv_process_inline (vlib_main_t * vm, vlib_node_runtime_t * node, if (!tcp_rcv_ack_is_acceptable (tc0, b0)) goto drop; - tcp_connection_del (tc0); + tc0->state = TCP_STATE_CLOSED; + + /* Don't delete the connection/session yet. Instead, wait a + * reasonable amount of time until the pipes are cleared. In + * particular, this makes sure that we won't have dead sessions + * when processing events on the tx path */ + tcp_timer_update (tc0, TCP_TIMER_WAITCLOSE, TCP_CLEANUP_TIME); + + /* Stop retransmit */ + tcp_timer_reset (tc0, TCP_TIMER_RETRANSMIT); + goto drop; break; @@ -1684,7 +1708,7 @@ tcp46_rcv_process_inline (vlib_main_t * vm, vlib_node_runtime_t * node, case TCP_STATE_SYN_RCVD: /* Send FIN-ACK notify app and enter CLOSE-WAIT */ tcp_connection_timers_reset (tc0); - tcp_make_finack (tc0, b0); + tcp_make_fin (tc0, b0); next0 = tcp_next_output (tc0->c_is_ip4); stream_session_disconnect_notify (&tc0->connection); tc0->state = TCP_STATE_CLOSE_WAIT; @@ -1697,12 +1721,12 @@ tcp46_rcv_process_inline (vlib_main_t * vm, vlib_node_runtime_t * node, case TCP_STATE_FIN_WAIT_1: tc0->state = TCP_STATE_TIME_WAIT; tcp_connection_timers_reset (tc0); - tcp_timer_set (tc0, TCP_TIMER_2MSL, TCP_2MSL_TIME); + tcp_timer_set (tc0, TCP_TIMER_WAITCLOSE, TCP_2MSL_TIME); break; case TCP_STATE_FIN_WAIT_2: /* Got FIN, send ACK! */ tc0->state = TCP_STATE_TIME_WAIT; - tcp_timer_set (tc0, TCP_TIMER_2MSL, TCP_2MSL_TIME); + tcp_timer_set (tc0, TCP_TIMER_WAITCLOSE, TCP_2MSL_TIME); tcp_make_ack (tc0, b0); next0 = tcp_next_output (is_ip4); break; @@ -1710,7 +1734,7 @@ tcp46_rcv_process_inline (vlib_main_t * vm, vlib_node_runtime_t * node, /* Remain in the TIME-WAIT state. Restart the 2 MSL time-wait * timeout. */ - tcp_timer_update (tc0, TCP_TIMER_2MSL, TCP_2MSL_TIME); + tcp_timer_update (tc0, TCP_TIMER_WAITCLOSE, TCP_2MSL_TIME); break; } @@ -2113,6 +2137,7 @@ tcp46_input_inline (vlib_main_t * vm, vlib_node_runtime_t * node, n_left_to_next -= 1; b0 = vlib_get_buffer (vm, bi0); + vnet_buffer (b0)->tcp.flags = 0; if (is_ip4) { @@ -2168,7 +2193,6 @@ tcp46_input_inline (vlib_main_t * vm, vlib_node_runtime_t * node, /* Send reset */ next0 = TCP_INPUT_NEXT_RESET; error0 = TCP_ERROR_NO_LISTENER; - vnet_buffer (b0)->tcp.flags = 0; } b0->error = error0 ? node->errors[error0] : 0; @@ -2288,6 +2312,7 @@ do { \ _(ESTABLISHED, TCP_FLAG_FIN, TCP_INPUT_NEXT_ESTABLISHED, TCP_ERROR_NONE); _(ESTABLISHED, TCP_FLAG_FIN | TCP_FLAG_ACK, TCP_INPUT_NEXT_ESTABLISHED, TCP_ERROR_NONE); + _(ESTABLISHED, TCP_FLAG_RST, TCP_INPUT_NEXT_ESTABLISHED, TCP_ERROR_NONE); /* ACK or FIN-ACK to our FIN */ _(FIN_WAIT_1, TCP_FLAG_ACK, TCP_INPUT_NEXT_RCV_PROCESS, TCP_ERROR_NONE); _(FIN_WAIT_1, TCP_FLAG_ACK | TCP_FLAG_FIN, TCP_INPUT_NEXT_RCV_PROCESS, diff --git a/src/vnet/tcp/tcp_output.c b/src/vnet/tcp/tcp_output.c index 7e431cd0454..aa43e9f37f0 100644 --- a/src/vnet/tcp/tcp_output.c +++ b/src/vnet/tcp/tcp_output.c @@ -396,6 +396,7 @@ tcp_reuse_buffer (vlib_main_t * vm, vlib_buffer_t * b) /* Leave enough space for headers */ vlib_buffer_make_headroom (b, MAX_HDRS_LEN); + vnet_buffer (b)->tcp.flags = 0; } /** @@ -443,16 +444,22 @@ tcp_make_ack (tcp_connection_t * tc, vlib_buffer_t * b) * Convert buffer to FIN-ACK */ void -tcp_make_finack (tcp_connection_t * tc, vlib_buffer_t * b) +tcp_make_fin (tcp_connection_t * tc, vlib_buffer_t * b) { tcp_main_t *tm = vnet_get_tcp_main (); vlib_main_t *vm = tm->vlib_main; + u8 flags = 0; tcp_reuse_buffer (vm, b); - tcp_make_ack_i (tc, b, TCP_STATE_ESTABLISHED, TCP_FLAG_ACK | TCP_FLAG_FIN); + + if (tc->rcv_las == tc->rcv_nxt) + flags = TCP_FLAG_FIN; + else + flags = TCP_FLAG_FIN | TCP_FLAG_ACK; + + tcp_make_ack_i (tc, b, TCP_STATE_ESTABLISHED, flags); /* Reset flags, make sure ack is sent */ - tc->flags = TCP_CONN_SNDACK; vnet_buffer (b)->tcp.flags &= ~TCP_BUF_FLAG_DUPACK; tc->snd_nxt += 1; @@ -500,7 +507,7 @@ tcp_make_synack (tcp_connection_t * tc, vlib_buffer_t * b) vnet_buffer (b)->tcp.flags = TCP_BUF_FLAG_ACK; /* Init retransmit timer */ - tcp_retransmit_timer_set (tm, tc); + tcp_retransmit_timer_set (tc); } always_inline void @@ -818,9 +825,9 @@ tcp_send_fin (tcp_connection_t * tc) /* Leave enough space for headers */ vlib_buffer_make_headroom (b, MAX_HDRS_LEN); - tcp_make_finack (tc, b); - + tcp_make_fin (tc, b); tcp_enqueue_to_output (vm, b, bi, tc->c_is_ip4); + tc->flags |= TCP_CONN_FINSNT; } always_inline u8 @@ -1038,7 +1045,7 @@ tcp_timer_retransmit_handler_i (u32 index, u8 is_syn) tcp_enqueue_to_output (vm, b, bi, tc->c_is_ip4); /* Re-enable retransmit timer */ - tcp_retransmit_timer_set (tm, tc); + tcp_retransmit_timer_set (tc); } else { @@ -1139,7 +1146,6 @@ tcp46_output_inline (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * from_frame, int is_ip4) { - tcp_main_t *tm = vnet_get_tcp_main (); u32 n_left_from, next_index, *from, *to_next; u32 my_thread_index = vm->cpu_index; @@ -1172,6 +1178,13 @@ tcp46_output_inline (vlib_main_t * vm, b0 = vlib_get_buffer (vm, bi0); tc0 = tcp_connection_get (vnet_buffer (b0)->tcp.connection_index, my_thread_index); + if (PREDICT_FALSE (tc0 == 0 || tc0->state == TCP_STATE_CLOSED)) + { + error0 = TCP_ERROR_INVALID_CONNECTION; + next0 = TCP_OUTPUT_NEXT_DROP; + goto done; + } + th0 = vlib_buffer_get_current (b0); if (is_ip4) @@ -1229,6 +1242,22 @@ tcp46_output_inline (vlib_main_t * vm, tc0->rtt_ts = tcp_time_now (); tc0->rtt_seq = tc0->snd_nxt; } + + if (1) + { + ELOG_TYPE_DECLARE (e) = + { + .format = + "output: snd_una %u snd_una_max %u",.format_args = + "i4i4",}; + struct + { + u32 data[2]; + } *ed; + ed = ELOG_DATA (&vm->elog_main, e); + ed->data[0] = tc0->snd_una - tc0->iss; + ed->data[1] = tc0->snd_una_max - tc0->iss; + } } /* Set the retransmit timer if not set already and not @@ -1236,7 +1265,7 @@ tcp46_output_inline (vlib_main_t * vm, if (!tcp_timer_is_active (tc0, TCP_TIMER_RETRANSMIT) && tc0->snd_nxt != tc0->snd_una) { - tcp_retransmit_timer_set (tm, tc0); + tcp_retransmit_timer_set (tc0); tc0->rto_boff = 0; } diff --git a/src/vnet/udp/builtin_server.c b/src/vnet/udp/builtin_server.c index afa66ba4453..46c8e734b3a 100644 --- a/src/vnet/udp/builtin_server.c +++ b/src/vnet/udp/builtin_server.c @@ -39,7 +39,7 @@ builtin_session_disconnect_callback (stream_session_t * s) } static int -builtin_server_rx_callback (stream_session_t * s) +builtin_server_rx_callback (stream_session_t * s, session_fifo_event_t * ep) { svm_fifo_t *rx_fifo, *tx_fifo; u32 this_transfer; diff --git a/src/vnet/udp/udp_input.c b/src/vnet/udp/udp_input.c index 4d509335c32..88278735b55 100644 --- a/src/vnet/udp/udp_input.c +++ b/src/vnet/udp/udp_input.c @@ -244,19 +244,19 @@ udp4_uri_input_node_fn (vlib_main_t * vm, /* Get session's server */ server0 = application_get (s0->app_index); - /* Built-in server? Deliver the goods... */ - if (server0->cb_fns.builtin_server_rx_callback) - { - server0->cb_fns.builtin_server_rx_callback (s0); - continue; - } - /* Fabricate event */ evt.fifo = s0->server_rx_fifo; evt.event_type = FIFO_EVENT_SERVER_RX; evt.event_id = serial_number++; evt.enqueue_length = svm_fifo_max_dequeue (s0->server_rx_fifo); + /* Built-in server? Deliver the goods... */ + if (server0->cb_fns.builtin_server_rx_callback) + { + server0->cb_fns.builtin_server_rx_callback (s0, &evt); + continue; + } + /* Add event to server's event queue */ q = server0->event_queue; diff --git a/src/vnet/unix/tapcli.c b/src/vnet/unix/tapcli.c index 496f38851e9..fb1a8bac7d4 100644 --- a/src/vnet/unix/tapcli.c +++ b/src/vnet/unix/tapcli.c @@ -1435,7 +1435,8 @@ done: VLIB_CLI_COMMAND (tap_connect_command, static) = { .path = "tap connect", - .short_help = "tap connect [hwaddr ]", + .short_help = + "tap connect [address /mw] [hwaddr ]", .function = tap_connect_command_fn, }; -- 2.16.6