From a74f0ef0a54e821da1737743af64960354b74bf7 Mon Sep 17 00:00:00 2001 From: Mohammed Hawari Date: Thu, 9 Oct 2025 17:22:01 +0200 Subject: [PATCH] sfdp: StateFul Data Plane Change-Id: I229548afbae609f8a15047821e811874aa788a53 Type: feature Signed-off-by: Mohammed Hawari --- MAINTAINERS | 6 + docs/spelling_wordlist.txt | 3 + src/vnet/CMakeLists.txt | 49 ++ src/vnet/sfdp/api.c | 266 ++++++++++ src/vnet/sfdp/callbacks.c | 19 + src/vnet/sfdp/callbacks.h | 126 +++++ src/vnet/sfdp/cli.c | 551 +++++++++++++++++++ src/vnet/sfdp/common.h | 53 ++ src/vnet/sfdp/drop/node.c | 103 ++++ src/vnet/sfdp/expiry/expiry.c | 165 ++++++ src/vnet/sfdp/expiry/expiry.h | 116 ++++ src/vnet/sfdp/expiry/expiry_cli.c | 87 +++ src/vnet/sfdp/format.c | 478 +++++++++++++++++ src/vnet/sfdp/lookup/full_reass_node.c | 155 ++++++ src/vnet/sfdp/lookup/icmp_error_node.c | 398 ++++++++++++++ src/vnet/sfdp/lookup/lookup.h | 30 ++ src/vnet/sfdp/lookup/lookup_common.h | 171 ++++++ src/vnet/sfdp/lookup/lookup_inlines.h | 17 + src/vnet/sfdp/lookup/lookup_ip4.h | 215 ++++++++ src/vnet/sfdp/lookup/lookup_ip6.h | 269 ++++++++++ src/vnet/sfdp/lookup/node.c | 938 +++++++++++++++++++++++++++++++++ src/vnet/sfdp/lookup/parser.c | 62 +++ src/vnet/sfdp/lookup/parser.h | 126 +++++ src/vnet/sfdp/lookup/parser_inlines.h | 646 +++++++++++++++++++++++ src/vnet/sfdp/lookup/reass.c | 74 +++ src/vnet/sfdp/lookup/reass.h | 24 + src/vnet/sfdp/lookup/sfdp_bihashes.h | 40 ++ src/vnet/sfdp/lookup/sv_reass_node.c | 148 ++++++ src/vnet/sfdp/service.c | 347 ++++++++++++ src/vnet/sfdp/service.h | 99 ++++ src/vnet/sfdp/sfdp.api | 115 ++++ src/vnet/sfdp/sfdp.c | 550 +++++++++++++++++++ src/vnet/sfdp/sfdp.h | 891 +++++++++++++++++++++++++++++++ src/vnet/sfdp/sfdp_funcs.h | 308 +++++++++++ src/vnet/sfdp/sfdp_types.api | 48 ++ src/vnet/sfdp/sfdp_types_funcs.h | 90 ++++ src/vnet/sfdp/timer/timer.c | 161 ++++++ src/vnet/sfdp/timer/timer.h | 142 +++++ 38 files changed, 8086 insertions(+) create mode 100644 src/vnet/sfdp/api.c create mode 100644 src/vnet/sfdp/callbacks.c create mode 100644 src/vnet/sfdp/callbacks.h create mode 100644 src/vnet/sfdp/cli.c create mode 100644 src/vnet/sfdp/common.h create mode 100644 src/vnet/sfdp/drop/node.c create mode 100644 src/vnet/sfdp/expiry/expiry.c create mode 100644 src/vnet/sfdp/expiry/expiry.h create mode 100644 src/vnet/sfdp/expiry/expiry_cli.c create mode 100644 src/vnet/sfdp/format.c create mode 100644 src/vnet/sfdp/lookup/full_reass_node.c create mode 100644 src/vnet/sfdp/lookup/icmp_error_node.c create mode 100644 src/vnet/sfdp/lookup/lookup.h create mode 100644 src/vnet/sfdp/lookup/lookup_common.h create mode 100644 src/vnet/sfdp/lookup/lookup_inlines.h create mode 100644 src/vnet/sfdp/lookup/lookup_ip4.h create mode 100644 src/vnet/sfdp/lookup/lookup_ip6.h create mode 100644 src/vnet/sfdp/lookup/node.c create mode 100644 src/vnet/sfdp/lookup/parser.c create mode 100644 src/vnet/sfdp/lookup/parser.h create mode 100644 src/vnet/sfdp/lookup/parser_inlines.h create mode 100644 src/vnet/sfdp/lookup/reass.c create mode 100644 src/vnet/sfdp/lookup/reass.h create mode 100644 src/vnet/sfdp/lookup/sfdp_bihashes.h create mode 100644 src/vnet/sfdp/lookup/sv_reass_node.c create mode 100644 src/vnet/sfdp/service.c create mode 100644 src/vnet/sfdp/service.h create mode 100644 src/vnet/sfdp/sfdp.api create mode 100644 src/vnet/sfdp/sfdp.c create mode 100644 src/vnet/sfdp/sfdp.h create mode 100644 src/vnet/sfdp/sfdp_funcs.h create mode 100644 src/vnet/sfdp/sfdp_types.api create mode 100644 src/vnet/sfdp/sfdp_types_funcs.h create mode 100644 src/vnet/sfdp/timer/timer.c create mode 100644 src/vnet/sfdp/timer/timer.h diff --git a/MAINTAINERS b/MAINTAINERS index 647dd1b2630..6ba9e56a791 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -217,6 +217,12 @@ I: span M: N/A F: src/vnet/span +VNET StateFul Data Plane +I: sfdp +M: Mohammed Hawari +M: Ole Troan +F: src/vnet/sfdp/ + Plugin - Crypto - native I: crypto-native M: Damjan Marion diff --git a/docs/spelling_wordlist.txt b/docs/spelling_wordlist.txt index 73b26a430f0..3794cf83e85 100644 --- a/docs/spelling_wordlist.txt +++ b/docs/spelling_wordlist.txt @@ -1036,6 +1036,8 @@ setjmp settingupenvironment setUp setUpClass +sfdp +SFDP sflow sFlow sfr @@ -1061,6 +1063,7 @@ snat socketsvr socksvr sourceNATing +sp spd Specialisations spinlock diff --git a/src/vnet/CMakeLists.txt b/src/vnet/CMakeLists.txt index 8d62922bf48..5e54ae09df8 100644 --- a/src/vnet/CMakeLists.txt +++ b/src/vnet/CMakeLists.txt @@ -1399,6 +1399,55 @@ list(APPEND VNET_API_FILES ip6-nd/rd_cp.api ) +############################################################################## +# SFDP - StateFul Data Plane Infra +############################################################################## +list (APPEND VNET_SOURCES + sfdp/lookup/node.c + sfdp/lookup/icmp_error_node.c + sfdp/lookup/parser.c + sfdp/drop/node.c + sfdp/format.c + sfdp/sfdp.c + sfdp/service.c + sfdp/cli.c + sfdp/api.c + sfdp/timer/timer.c + sfdp/expiry/expiry.c + sfdp/expiry/expiry_cli.c + sfdp/callbacks.c +) + +list (APPEND VNET_HEADERS + sfdp/sfdp.h + sfdp/callbacks.h + sfdp/service.h + sfdp/sfdp_funcs.h + sfdp/sfdp_types_funcs.h + sfdp/common.h + sfdp/lookup/sfdp_bihashes.h + sfdp/lookup/lookup.h + sfdp/lookup/lookup_common.h + sfdp/lookup/lookup_ip4.h + sfdp/lookup/lookup_ip6.h + sfdp/lookup/lookup_inlines.h + sfdp/lookup/parser.h + sfdp/lookup/parser_inlines.h + sfdp/timer/timer.h + sfdp/expiry/expiry.h +) + +list (APPEND VNET_API_FILES + sfdp/sfdp_types.api + sfdp/sfdp.api +) + +list (APPEND VNET_MULTIARCH_SOURCES + sfdp/lookup/node.c + sfdp/lookup/parser.c + sfdp/drop/node.c +) + ############################################################################## # VNET Library ############################################################################## diff --git a/src/vnet/sfdp/api.c b/src/vnet/sfdp/api.c new file mode 100644 index 00000000000..0fab825f30c --- /dev/null +++ b/src/vnet/sfdp/api.c @@ -0,0 +1,266 @@ +/* SPDX-License-Identifier: Apache-2.0 + * Copyright (c) 2025 Cisco Systems, Inc. + */ + +#include + +#include +#include +#include +#include +#include +#include +#include + +#define REPLY_MSG_ID_BASE sfdp->msg_id_base +#include + +static void +vl_api_sfdp_tenant_add_del_t_handler (vl_api_sfdp_tenant_add_del_t *mp) +{ + sfdp_main_t *sfdp = &sfdp_main; + u32 tenant_id = clib_net_to_host_u32 (mp->tenant_id); + u32 context_id = + mp->context_id == ~0 ? tenant_id : clib_net_to_host_u32 (mp->context_id); + u8 is_del = mp->is_del; + clib_error_t *err = + sfdp_tenant_add_del (sfdp, tenant_id, context_id, is_del); + vl_api_sfdp_tenant_add_del_reply_t *rmp; + int rv = err ? -1 : 0; + REPLY_MACRO (VL_API_SFDP_TENANT_ADD_DEL_REPLY); +} + +static void +vl_api_sfdp_set_services_t_handler (vl_api_sfdp_set_services_t *mp) +{ + sfdp_main_t *sfdp = &sfdp_main; + u32 tenant_id = clib_net_to_host_u32 (mp->tenant_id); + sfdp_bitmap_t bitmap = 0; + u8 idx = 0; + u8 dir = sfdp_api_direction (mp->dir); + int rv; + for (uword i = 0; i < mp->n_services; i++) + { + char *cstring = (char *) mp->services[i].data; + unformat_input_t tmp; + unformat_init_string (&tmp, cstring, + strnlen (cstring, sizeof (mp->services[0].data))); + rv = unformat_user (&tmp, unformat_sfdp_service, &idx); + unformat_free (&tmp); + if (!rv) + { + rv = -1; + goto fail; + } + bitmap |= (1ULL << idx); + } + clib_error_t *err = sfdp_set_services (sfdp, tenant_id, bitmap, dir); + vl_api_sfdp_set_services_reply_t *rmp; + rv = err ? -1 : 0; +fail: + REPLY_MACRO (VL_API_SFDP_SET_SERVICES_REPLY); +} + +static void +vl_api_sfdp_set_timeout_t_handler (vl_api_sfdp_set_timeout_t *mp) +{ + sfdp_main_t *sfdp = &sfdp_main; + u32 tenant_id = clib_net_to_host_u32 (mp->tenant_id); + u32 timeout_id = clib_net_to_host_u32 (mp->timeout_id); + u32 timeout_value = clib_net_to_host_u32 (mp->timeout_value); + clib_error_t *err = + sfdp_set_timeout (sfdp, tenant_id, timeout_id, timeout_value); + vl_api_sfdp_set_timeout_reply_t *rmp; + int rv = err ? -1 : 0; + REPLY_MACRO (VL_API_SFDP_SET_TIMEOUT_REPLY); +} + +static void +vl_api_sfdp_set_sp_node_t_handler (vl_api_sfdp_set_sp_node_t *mp) +{ + vl_api_sfdp_set_sp_node_reply_t *rmp; + sfdp_main_t *sfdp = &sfdp_main; + u32 tenant_id = clib_net_to_host_u32 (mp->tenant_id); + u8 sp_node = sfdp_api_sp_node (mp->sp_node); + u32 node_index = clib_net_to_host_u32 (mp->node_index); + + clib_error_t *err = sfdp_set_sp_node (sfdp, tenant_id, sp_node, node_index); + int rv = err ? -1 : 0; + REPLY_MACRO (VL_API_SFDP_SET_SP_NODE_REPLY); +} + +static void +vl_api_sfdp_set_icmp_error_node_t_handler ( + vl_api_sfdp_set_icmp_error_node_t *mp) +{ + vl_api_sfdp_set_icmp_error_node_reply_t *rmp; + sfdp_main_t *sfdp = &sfdp_main; + u32 tenant_id = clib_net_to_host_u32 (mp->tenant_id); + u8 is_ip6 = mp->is_ip6; + u32 node_index = clib_net_to_host_u32 (mp->node_index); + + clib_error_t *err = + sfdp_set_icmp_error_node (sfdp, tenant_id, is_ip6, node_index); + int rv = err ? -1 : 0; + REPLY_MACRO (VL_API_SFDP_SET_ICMP_ERROR_NODE_REPLY); +} + +static vl_api_sfdp_session_state_t +sfdp_session_state_encode (sfdp_session_state_t x) +{ + switch (x) + { + case SFDP_SESSION_STATE_FSOL: + return SFDP_API_SESSION_STATE_FSOL; + case SFDP_SESSION_STATE_ESTABLISHED: + return SFDP_API_SESSION_STATE_ESTABLISHED; + case SFDP_SESSION_STATE_TIME_WAIT: + return SFDP_API_SESSION_STATE_TIME_WAIT; + default: + return -1; + } +}; + +static void +sfdp_send_session_details (vl_api_registration_t *rp, u32 context, + u32 session_index, u32 thread_index, + sfdp_session_t *session) +{ + sfdp_main_t *sfdp = &sfdp_main; + vlib_main_t *vm = vlib_get_main (); + vl_api_sfdp_session_details_t *mp; + sfdp_session_ip46_key_t skey; + sfdp_tenant_t *tenant; + u32 tenant_id; + f64 now = vlib_time_now (vm); + size_t msg_size; + u8 n_keys = sfdp_session_n_keys (session); + tenant = sfdp_tenant_at_index (sfdp, session->tenant_idx); + tenant_id = tenant->tenant_id; + msg_size = sizeof (*mp) + sizeof (mp->keys[0]) * n_keys; + + mp = vl_msg_api_alloc_zero (msg_size); + mp->_vl_msg_id = ntohs (VL_API_SFDP_SESSION_DETAILS + sfdp->msg_id_base); + + /* fill in the message */ + mp->context = context; + mp->session_id = clib_host_to_net_u64 (session->session_id); + mp->thread_index = clib_host_to_net_u32 (thread_index); + mp->tenant_id = clib_host_to_net_u32 (tenant_id); + mp->session_idx = clib_host_to_net_u32 (session_index); + mp->session_type = sfdp_session_type_encode (session->type); + mp->protocol = ip_proto_encode (session->proto); + mp->state = sfdp_session_state_encode (session->state); + mp->remaining_time = + sfdp->expiry_callbacks.session_remaining_time (session, now); + mp->forward_bitmap = + clib_host_to_net_u64 (session->bitmaps[SFDP_FLOW_FORWARD]); + mp->reverse_bitmap = + clib_host_to_net_u64 (session->bitmaps[SFDP_FLOW_REVERSE]); + mp->n_keys = n_keys; + for (int i = 0; i < n_keys; i++) + { + if ((i == 0 && + session->key_flags & SFDP_SESSION_KEY_FLAG_PRIMARY_VALID_IP4) || + (i == 1 && + session->key_flags & SFDP_SESSION_KEY_FLAG_SECONDARY_VALID_IP4)) + { + sfdp_normalise_ip4_key (session, &skey.key4, i); + sfdp_session_ip46_key_encode (&skey, IP46_TYPE_IP4, &mp->keys[i]); + } + if ((i == 0 && + session->key_flags & SFDP_SESSION_KEY_FLAG_PRIMARY_VALID_IP6) || + (i == 1 && + session->key_flags & SFDP_SESSION_KEY_FLAG_SECONDARY_VALID_IP6)) + { + sfdp_normalise_ip6_key (session, &skey.key6, i); + sfdp_session_ip46_key_encode (&skey, IP46_TYPE_IP6, &mp->keys[i]); + } + } + vl_api_send_msg (rp, (u8 *) mp); +} + +static void +vl_api_sfdp_session_dump_t_handler (vl_api_sfdp_session_dump_t *mp) +{ + sfdp_main_t *sfdp = &sfdp_main; + sfdp_session_t *session; + uword session_index; + vl_api_registration_t *rp; + rp = vl_api_client_index_to_registration (mp->client_index); + if (rp == 0) + return; + + sfdp_foreach_session (sfdp, session_index, session) + { + sfdp_send_session_details (rp, mp->context, session_index, + session->owning_thread_index, session); + } +} + +static void +sfdp_send_tenant_details (vl_api_registration_t *rp, u32 context, + u16 tenant_index, sfdp_tenant_t *tenant) +{ + sfdp_main_t *sfdp = &sfdp_main; + vl_api_sfdp_tenant_details_t *mp; + sfdp_timeout_t *timeout; + + size_t msg_size; + msg_size = sizeof (*mp) + SFDP_MAX_TIMEOUTS * sizeof (mp->timeout[0]); + + mp = vl_msg_api_alloc_zero (msg_size); + mp->_vl_msg_id = ntohs (VL_API_SFDP_TENANT_DETAILS + sfdp->msg_id_base); + + /* fill in the message */ + mp->context = context; + mp->context_id = clib_host_to_net_u32 (tenant->context_id); + mp->index = clib_host_to_net_u32 (tenant_index); + mp->forward_bitmap = + clib_host_to_net_u64 (tenant->bitmaps[SFDP_FLOW_FORWARD]); + mp->reverse_bitmap = + clib_host_to_net_u64 (tenant->bitmaps[SFDP_FLOW_REVERSE]); + mp->n_timeout = clib_host_to_net_u32 (SFDP_MAX_TIMEOUTS); + sfdp_foreach_timeout (sfdp, timeout) + { + u32 idx = timeout - sfdp->timeouts; + mp->timeout[idx] = clib_host_to_net_u32 (tenant->timeouts[idx]); + } + + vl_api_send_msg (rp, (u8 *) mp); +} + +static void +vl_api_sfdp_tenant_dump_t_handler (vl_api_sfdp_tenant_dump_t *mp) +{ + sfdp_main_t *sfdp = &sfdp_main; + sfdp_tenant_t *tenant; + u16 tenant_index; + vl_api_registration_t *rp; + rp = vl_api_client_index_to_registration (mp->client_index); + if (rp == 0) + return; + + pool_foreach_index (tenant_index, sfdp->tenants) + { + tenant = sfdp_tenant_at_index (sfdp, tenant_index); + sfdp_send_tenant_details (rp, mp->context, tenant_index, tenant); + } +} + +#include +static clib_error_t * +sfdp_plugin_api_hookup (vlib_main_t *vm) +{ + sfdp_main_t *sfdp = &sfdp_main; + sfdp->msg_id_base = setup_message_id_table (); + return 0; +} +VLIB_API_INIT_FUNCTION (sfdp_plugin_api_hookup); +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/vnet/sfdp/callbacks.c b/src/vnet/sfdp/callbacks.c new file mode 100644 index 00000000000..b7eefec16e6 --- /dev/null +++ b/src/vnet/sfdp/callbacks.c @@ -0,0 +1,19 @@ +/* SPDX-License-Identifier: Apache-2.0 + * Copyright (c) 2025 Cisco Systems, Inc. + */ + +#include +#include + +sfdp_callback_main_t sfdp_callback_main; + +static clib_error_t * +sfdp_callback_init (vlib_main_t *vm) +{ +#define _(x, ...) SFDP_CALLBACK_BUILD_EFFECTIVE_LIST (x); + foreach_sfdp_callback_type +#undef _ + return 0; +} + +VLIB_INIT_FUNCTION (sfdp_callback_init); \ No newline at end of file diff --git a/src/vnet/sfdp/callbacks.h b/src/vnet/sfdp/callbacks.h new file mode 100644 index 00000000000..a9c5f826f3d --- /dev/null +++ b/src/vnet/sfdp/callbacks.h @@ -0,0 +1,126 @@ +/* SPDX-License-Identifier: Apache-2.0 + * Copyright (c) 2025 Cisco Systems, Inc. + */ + +#ifndef __included_callbacks_h +#define __included callbacks_h +#include + +#define foreach_sfdp_callback_type \ + /* Called by sfdp-lookup after new flows have been created but before \ + * packets have been fully processed. \ + * This gives the opportunity for the SFDP user to initialize \ + * per-flow state or even modify the service chain before it gets used. */ \ + _ (notify_new_sessions, u32, const u32 *, u32) \ + /* Called during pre-input phase to notify that certain flows are being \ + * removed. This gives the opportunity for the SFDP user to reset per-flow \ + * state while no packet is currently being processed by this thread. \ + * This is called before any flow state is removed. */ \ + _ (notify_deleted_sessions, u32, const u32 *, u32) + +#define SFDP_CB_ELT_LIST_TYPE_DECLARE(fn_ptr_type) \ + typedef struct sfdp_cb_elt_list_##fn_ptr_type##_s \ + { \ + struct sfdp_cb_elt_list_##fn_ptr_type##_s *next; \ + fn_ptr_type fun; \ + const char *name; \ + } sfdp_cb_elt_list_##fn_ptr_type##_t; + +#define SFDP_CB_ELT_LIST_TYPE(fn_ptr_type) sfdp_cb_elt_list_##fn_ptr_type##_t + +#define SFDP_CALL_CB_ELT(ptr, x...) ((ptr)->fun (x)) + +#ifndef CLIB_MARCH_VARIANT +#define SFDP_REGISTER_CALLBACK(type, head, name2) \ + static SFDP_CB_ELT_LIST_TYPE (type) \ + sfdp_callback_registration_##type_##name2; \ + __attribute__ ((__constructor__)) static void \ + __sfdp_callback_add_registration_##type_##name2 (void) \ + \ + { \ + sfdp_callback_main_t *sfdp = &sfdp_callback_main; \ + SFDP_CB_ELT_LIST_TYPE (type) *r = \ + &sfdp_callback_registration_##type_##name2; \ + r->next = sfdp->head; \ + sfdp->head = r; \ + r->name = #name2; \ + } \ + static SFDP_CB_ELT_LIST_TYPE (type) sfdp_callback_registration_##type_##name2 + +#define SFDP_BLACKLIST_CALLBACK(type, head, name) \ + __attribute__ ((__constructor__)) static void \ + __sfdp_callback_blacklist_registration_##type_##name (void) \ + \ + { \ + sfdp_callback_main_t *sfdp = &sfdp_callback_main; \ + vec_add1 (sfdp->blacklist_##head, (const u8 *) #name); \ + } +#else +#define SFDP_REGISTER_CALLBACK(type, head, name2) \ + static SFDP_CB_ELT_LIST_TYPE (type) \ + __clib_unused sfdp_callback_registration_##type_##name2 +#define SFDP_BLACKLIST_CALLBACK(type, head, name) +#endif + +#define _(x, y, z...) typedef y (*sfdp_##x##_cb_t) (z); +foreach_sfdp_callback_type +#undef _ + +#define _(x, ...) SFDP_CB_ELT_LIST_TYPE_DECLARE (sfdp_##x##_cb_t) + foreach_sfdp_callback_type +#undef _ + + typedef struct +{ +#define _(x, ...) \ + SFDP_CB_ELT_LIST_TYPE (sfdp_##x##_cb_t) * head_##x; \ + const u8 **blacklist_head_##x; \ + SFDP_CB_ELT_LIST_TYPE (sfdp_##x##_cb_t) * *effective_##x; + foreach_sfdp_callback_type +#undef _ +} sfdp_callback_main_t; + +extern sfdp_callback_main_t sfdp_callback_main; + +#define SFDP_CALLBACK_BUILD_EFFECTIVE_LIST(x) \ + do \ + { \ + typeof (sfdp_callback_main.head_##x) hd = sfdp_callback_main.head_##x; \ + while (hd != 0) \ + { \ + u8 excluded = 0; \ + const u8 **cur; \ + vec_foreach (cur, sfdp_callback_main.blacklist_head_##x) \ + if (!clib_strncmp ((const char *) cur[0], hd->name, 256)) \ + excluded = 1; \ + if (excluded == 0) \ + vec_add1 (sfdp_callback_main.effective_##x, hd); \ + hd = hd->next; \ + } \ + } \ + while (0) + +#define SFDP_CALLBACKS_CALL(x, y...) \ + do \ + { \ + typeof (sfdp_callback_main.effective_##x) elt; \ + vec_foreach (elt, sfdp_callback_main.effective_##x) \ + SFDP_CALL_CB_ELT (elt[0], y); \ + } \ + while (0) + +/* Per callback type specializations */ +#define SFDP_REGISTER_NEW_SESSIONS_CALLBACK(name) \ + SFDP_REGISTER_CALLBACK (sfdp_notify_new_sessions_cb_t, \ + head_notify_new_sessions, name) +#define SFDP_BLACKLIST_NEW_SESSIONS_CALLBACK(name) \ + SFDP_BLACKLIST_CALLBACK (sfdp_notify_new_sessions_cb_t, \ + head_notify_new_sessions, name) + +#define SFDP_REGISTER_DELETED_SESSIONS_CALLBACK(name) \ + SFDP_REGISTER_CALLBACK (sfdp_notify_deleted_sessions_cb_t, \ + head_notify_deleted_sessions, name) +#define SFDP_BLACKLIST_DELETED_SESSIONS_CALLBACK(name) \ + SFDP_BLACKLIST_CALLBACK (sfdp_notify_deleted_sessions_cb_t, \ + head_notify_deleted_sessions, name) +#endif diff --git a/src/vnet/sfdp/cli.c b/src/vnet/sfdp/cli.c new file mode 100644 index 00000000000..b0149bb253b --- /dev/null +++ b/src/vnet/sfdp/cli.c @@ -0,0 +1,551 @@ +/* SPDX-License-Identifier: Apache-2.0 + * Copyright (c) 2025 Cisco Systems, Inc. + */ + +#include +#include +#include +#include + +/* + * add CLI: + * sfdp tenant + * + * it creates entry in the tenant pool. Default service chains in both + * directions is "sfdp-drop" + * + * + * add CLI: + * set sfdp services tenant (SERVICE_NAME)+ + * + * configure tenant with a service chain for a given direction (forward or + * reverse) + * + */ + +static clib_error_t * +sfdp_tenant_add_del_command_fn (vlib_main_t *vm, unformat_input_t *input, + vlib_cli_command_t *cmd) +{ + unformat_input_t line_input_, *line_input = &line_input_; + clib_error_t *err = 0; + sfdp_main_t *sfdp = &sfdp_main; + u8 is_del = 0; + u32 tenant_id = ~0; + u32 context_id = ~0; + if (!unformat_user (input, unformat_line_input, line_input)) + return 0; + while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (line_input, "add %d", &tenant_id)) + is_del = 0; + else if (unformat (line_input, "del %d", &tenant_id)) + is_del = 1; + else if (unformat (line_input, "context %d", &context_id)) + ; + else + { + err = unformat_parse_error (line_input); + goto done; + } + } + if (tenant_id == ~0) + { + err = clib_error_return (0, "missing tenant id"); + goto done; + } + if (context_id == ~0) + context_id = tenant_id; + err = sfdp_tenant_add_del (sfdp, tenant_id, context_id, is_del); +done: + unformat_free (line_input); + return err; +} + +static clib_error_t * +sfdp_set_services_command_fn (vlib_main_t *vm, unformat_input_t *input, + vlib_cli_command_t *cmd) +{ + unformat_input_t line_input_, *line_input = &line_input_; + clib_error_t *err = 0; + sfdp_main_t *sfdp = &sfdp_main; + u32 tenant_id = ~0; + sfdp_bitmap_t bitmap = 0; + u8 direction = ~0; + + if (!unformat_user (input, unformat_line_input, line_input)) + return 0; + while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (line_input, "tenant %d", &tenant_id)) + ; + else if (unformat_user (line_input, unformat_sfdp_service_bitmap, + &bitmap)) + ; + else if (unformat (line_input, "forward")) + direction = SFDP_FLOW_FORWARD; + else if (unformat (line_input, "reverse")) + direction = SFDP_FLOW_REVERSE; + else + { + err = unformat_parse_error (line_input); + goto done; + } + } + if (tenant_id == ~0) + { + err = clib_error_return (0, "missing tenant id"); + goto done; + } + if (direction == (u8) ~0) + { + err = clib_error_return (0, "missing direction"); + goto done; + } + sfdp_set_services (sfdp, tenant_id, bitmap, direction); +done: + unformat_free (line_input); + return err; +} + +static_always_inline u32 +table_format_insert_sfdp_service (table_t *t, + sfdp_service_registration_t *service, u32 n) +{ + table_format_cell (t, n, 0, "%s", service->node_name); + table_set_cell_align (t, n, 0, TTAA_LEFT); + table_format_cell (t, n, 1, "%u", *(service->index_in_bitmap)); + table_set_cell_align (t, n, 1, TTAA_CENTER); + table_format_cell (t, n, 2, "%s", (service->is_terminal) ? "T" : ""); + table_set_cell_align (t, n, 1, TTAA_CENTER); + return n + 1; +} + +static clib_error_t * +sfdp_show_services_fn (vlib_main_t *vm, unformat_input_t *input, + vlib_cli_command_t *cmd) +{ + sfdp_service_main_t *vsm = &sfdp_service_main; + sfdp_service_registration_t ***services_for_scope; + + vec_foreach (services_for_scope, vsm->services_per_scope_index) + { + table_t service_table_ = {}, *service_table = &service_table_; + u32 scope_index = services_for_scope - vsm->services_per_scope_index; + sfdp_service_registration_t **service; + table_format_title (service_table, + "Registered SFDP services for scope '%s'", + vsm->scope_names[scope_index]); + table_add_header_col (service_table, 3, "Node name", "Index", + "Terminal"); + + u32 n = 0; + vec_foreach (service, *services_for_scope) + { + n = table_format_insert_sfdp_service (service_table, *service, n); + } + vlib_cli_output (vm, "%U", format_table, service_table); + vlib_cli_output (vm, "%u / 64 registered services", n); + table_free (service_table); + } + return 0; +} + +static clib_error_t * +sfdp_set_timeout_command_fn (vlib_main_t *vm, unformat_input_t *input, + vlib_cli_command_t *cmd) +{ + unformat_input_t line_input_, *line_input = &line_input_; + clib_error_t *err = 0; + sfdp_main_t *sfdp = &sfdp_main; + u32 tenant_id = ~0; + u32 timeout_idx = ~0; + u32 timeout_val = ~0; + + if (!unformat_user (input, unformat_line_input, line_input)) + return 0; + while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (line_input, "tenant %d", &tenant_id)) + ; + else if (unformat (line_input, "%U %d", unformat_sfdp_timeout_name, + &timeout_idx, &timeout_val)) + ; + else + { + err = unformat_parse_error (line_input); + goto done; + } + } + if (tenant_id == ~0) + { + err = clib_error_return (0, "missing tenant id"); + goto done; + } + if (timeout_idx == ~0) + { + err = clib_error_return (0, "missing timeout"); + goto done; + } + + err = sfdp_set_timeout (sfdp, tenant_id, timeout_idx, timeout_val); +done: + unformat_free (line_input); + return err; +} + +static clib_error_t * +sfdp_set_sp_node_command_fn (vlib_main_t *vm, unformat_input_t *input, + vlib_cli_command_t *cmd) +{ + unformat_input_t line_input_, *line_input = &line_input_; + clib_error_t *err = 0; + sfdp_main_t *sfdp = &sfdp_main; + u32 tenant_id = ~0; + u32 sp_idx = ~0; + u32 node_index = ~0; + + if (!unformat_user (input, unformat_line_input, line_input)) + return 0; + while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (line_input, "tenant %d", &tenant_id)) + ; + else if (unformat (line_input, "node %U", unformat_vlib_node, vm, + &node_index)) + ; + else if (unformat (line_input, "%U", unformat_sfdp_sp_node, &sp_idx)) + ; + else + { + err = unformat_parse_error (line_input); + goto done; + } + } + if (tenant_id == ~0) + { + err = clib_error_return (0, "missing tenant id"); + goto done; + } + if (node_index == ~0) + { + err = clib_error_return (0, "missing node"); + goto done; + } + if (sp_idx == ~0) + { + err = clib_error_return (0, "missing slow-path"); + goto done; + } + + err = sfdp_set_sp_node (sfdp, tenant_id, sp_idx, node_index); +done: + unformat_free (line_input); + return err; +} + +static clib_error_t * +sfdp_set_icmp_error_node_command_fn (vlib_main_t *vm, unformat_input_t *input, + vlib_cli_command_t *cmd) +{ + unformat_input_t line_input_, *line_input = &line_input_; + clib_error_t *err = 0; + sfdp_main_t *sfdp = &sfdp_main; + u32 tenant_id = ~0; + u32 node_index = ~0; + u8 ip46 = 0; + + if (!unformat_user (input, unformat_line_input, line_input)) + return 0; + while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (line_input, "tenant %d", &tenant_id)) + ; + else if (unformat (line_input, "node %U", unformat_vlib_node, vm, + &node_index)) + ; + else if (unformat (line_input, "ip4")) + ip46 = 1; + else if (unformat (line_input, "ip6")) + ip46 = 2; + else + { + err = unformat_parse_error (line_input); + goto done; + } + } + if (tenant_id == ~0) + { + err = clib_error_return (0, "missing tenant id"); + goto done; + } + if (node_index == ~0) + { + err = clib_error_return (0, "missing node"); + goto done; + } + if (ip46 == 0) + { + err = clib_error_return (0, "missing adress family"); + goto done; + } + + err = sfdp_set_icmp_error_node (sfdp, tenant_id, ip46 - 1, node_index); + +done: + unformat_free (line_input); + return err; +} + +static clib_error_t * +sfdp_show_sessions_command_fn (vlib_main_t *vm, unformat_input_t *input, + vlib_cli_command_t *cmd) +{ + unformat_input_t line_input_, *line_input = &line_input_; + clib_error_t *err = 0; + sfdp_main_t *sfdp = &sfdp_main; + sfdp_session_t *session; + u32 session_index; + sfdp_tenant_t *tenant; + u32 tenant_id = ~0; + u32 max_output_value = 20; + bool is_show_all = false; + f64 now = vlib_time_now (vm); + + if (unformat_user (input, unformat_line_input, line_input)) + { + while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (line_input, "tenant %u", &tenant_id)) + ; + else if (unformat (line_input, "max %u", &max_output_value)) + ; + else if (unformat (line_input, "unsafe-show-all")) + is_show_all = true; + else + { + err = unformat_parse_error (line_input); + break; + } + } + unformat_free (line_input); + } + + if (!is_show_all && max_output_value == 0) + err = clib_error_return (0, "Please specify a positive integer for max"); + + if (!err) + { + table_t session_table_ = {}, *session_table = &session_table_; + u32 n = 0; + sfdp_table_format_add_header_col (session_table); + sfdp_foreach_session (sfdp, session_index, session) + { + tenant = sfdp_tenant_at_index (sfdp, session->tenant_idx); + if (tenant_id != ~0 && tenant_id != tenant->tenant_id) + continue; + n = sfdp_table_format_insert_session (session_table, n, session_index, + session, tenant->tenant_id, now); + + if (!is_show_all && n >= max_output_value) + break; + } + vlib_cli_output (vm, "%U", format_table, session_table); + if (n < pool_elts (sfdp->sessions)) + { + vlib_cli_output (vm, "Only %u sessions displayed, %u ignored", n, + pool_elts (sfdp->sessions) - n); + } + + table_free (session_table); + } + + return err; +} + +static clib_error_t * +sfdp_show_session_detail_command_fn (vlib_main_t *vm, unformat_input_t *input, + vlib_cli_command_t *cmd) +{ + unformat_input_t line_input_, *line_input = &line_input_; + clib_error_t *err = 0; + sfdp_main_t *sfdp = &sfdp_main; + clib_bihash_kv_8_8_t kv = { 0 }; + f64 now = vlib_time_now (vm); + u32 session_index; + u64 session_id; + if (unformat_user (input, unformat_line_input, line_input)) + { + if (unformat_check_input (line_input) == UNFORMAT_END_OF_INPUT || + unformat (line_input, "0x%X", sizeof (session_id), &session_id) == 0) + err = unformat_parse_error (line_input); + unformat_free (line_input); + } + else + err = clib_error_return (0, "No session id provided"); + + if (!err) + { + kv.key = session_id; + if (!clib_bihash_search_inline_8_8 (&sfdp->session_index_by_id, &kv)) + { + session_index = sfdp_session_index_from_lookup (kv.value); + vlib_cli_output (vm, "%U", format_sfdp_session_detail, session_index, + now); + } + else + { + err = + clib_error_return (0, "Session id 0x%llx not found", session_id); + } + } + return err; +} + +static clib_error_t * +sfdp_show_tenant_detail_command_fn (vlib_main_t *vm, unformat_input_t *input, + vlib_cli_command_t *cmd) +{ + unformat_input_t line_input_, *line_input = &line_input_; + clib_error_t *err = 0; + sfdp_main_t *sfdp = &sfdp_main; + sfdp_tenant_t *tenant; + u32 tenant_id = ~0; + u16 tenant_idx; + u8 detail = 0; + if (unformat_user (input, unformat_line_input, line_input)) + { + while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (line_input, "%d detail", &tenant_id)) + detail = 1; + else if (unformat (line_input, "%d", &tenant_id)) + ; + else + { + err = unformat_parse_error (line_input); + break; + } + } + unformat_free (line_input); + } + if (err) + return err; + + pool_foreach_index (tenant_idx, sfdp->tenants) + { + tenant = sfdp_tenant_at_index (sfdp, tenant_idx); + + if (tenant_id != ~0 && tenant->tenant_id != tenant_id) + continue; + + vlib_cli_output (vm, "Tenant %d", tenant->tenant_id); + vlib_cli_output (vm, " %U", format_sfdp_tenant, sfdp, tenant_idx, + tenant); + if (detail) + vlib_cli_output (vm, " %U", format_sfdp_tenant_extra, sfdp, + tenant_idx, tenant); + } + + return err; +} + +static clib_error_t * +sfdp_show_sfdp_status_command_fn (vlib_main_t *vm, unformat_input_t *input, + vlib_cli_command_t *cmd) +{ + sfdp_main_t *sfdp = &sfdp_main; + u64 max_sessions = sfdp_num_sessions (); + u64 free_sessions = sfdp_remaining_sessions_in_pool (); + u64 active_sessions = sfdp_real_active_sessions (); + f64 active_percent = (((f64) (100)) * active_sessions) / max_sessions; + + vlib_cli_output (vm, "sfdp status:\n"); + vlib_cli_output (vm, " max sessions: %lu\n", max_sessions); + vlib_cli_output (vm, " active sessions: %lu (%.2f%%)\n", active_sessions, + active_percent); + vlib_cli_output (vm, " free sessions: %lu\n", free_sessions); + vlib_cli_output (vm, " eviction sessions margin: %u\n", + sfdp->eviction_sessions_margin); + vlib_cli_output (vm, " max sessions cache per thread: %lu\n", + sfdp_num_sessions_cache_per_thread ()); + vlib_cli_output (vm, " max tenants: %llu\n", 1ULL << sfdp->log2_tenants); + + // iterate over all threads + sfdp_per_thread_data_t *ptd; + vec_foreach (ptd, sfdp->per_thread_data) + { + u32 index = ptd - sfdp->per_thread_data; + vlib_cli_output (vm, " [%u] active sessions: %lu\n", index, + ptd->n_sessions); + vlib_cli_output (vm, " [%u] cached sessions: %lu\n", index, + vec_len (ptd->session_freelist)); + } + + return NULL; +} + +VLIB_CLI_COMMAND (sfdp_tenant_add_del_command, static) = { + .path = "sfdp tenant", + .short_help = "sfdp tenant context ", + .function = sfdp_tenant_add_del_command_fn, +}; + +VLIB_CLI_COMMAND (sfdp_set_services_command, static) = { + .path = "set sfdp services", + .short_help = "set sfdp services tenant " + " [SERVICE_NAME]+ ", + .function = sfdp_set_services_command_fn, +}; + +VLIB_CLI_COMMAND (sfdp_show_services_command, static) = { + .path = "show sfdp services", + .short_help = "show sfdp services", + .function = sfdp_show_services_fn, +}; + +VLIB_CLI_COMMAND (show_sfdp_sessions_command, static) = { + .path = "show sfdp session-table", + .short_help = "show sfdp session-table [tenant ] " + "[max ] [unsafe-show-all]", + .function = sfdp_show_sessions_command_fn, +}; + +VLIB_CLI_COMMAND (show_sfdp_detail_command, static) = { + .path = "show sfdp session-detail", + .short_help = "show sfdp session-detail 0x", + .function = sfdp_show_session_detail_command_fn, +}; + +VLIB_CLI_COMMAND (show_sfdp_tenant, static) = { + .path = "show sfdp tenant", + .short_help = "show sfdp tenant [ [detail]]", + .function = sfdp_show_tenant_detail_command_fn, +}; + +VLIB_CLI_COMMAND (sfdp_show_sfdp_status_command, static) = { + .path = "show sfdp status", + .short_help = "show sfdp status", + .function = sfdp_show_sfdp_status_command_fn, +}; + +VLIB_CLI_COMMAND (sfdp_set_timeout_command, static) = { + .path = "set sfdp timeout", + .short_help = "set sfdp timeout tenant " + " ", + .function = sfdp_set_timeout_command_fn +}; + +VLIB_CLI_COMMAND (sfdp_set_sp_node_command, static) = { + .path = "set sfdp sp-node", + .short_help = "set sfdp sp-node tenant " + " node ", + .function = sfdp_set_sp_node_command_fn +}; + +VLIB_CLI_COMMAND (sfdp_set_icmp_error_node_command, static) = { + .path = "set sfdp icmp-error-node", + .short_help = "set sfdp icmp-error-node tenant " + " node ", + .function = sfdp_set_icmp_error_node_command_fn +}; diff --git a/src/vnet/sfdp/common.h b/src/vnet/sfdp/common.h new file mode 100644 index 00000000000..53ecb39a0bb --- /dev/null +++ b/src/vnet/sfdp/common.h @@ -0,0 +1,53 @@ +/* SPDX-License-Identifier: Apache-2.0 + * Copyright (c) 2025 Cisco Systems, Inc. + */ + +#ifndef __included_sfdp_common_h__ +#define __included_sfdp_common_h__ + +#include + +#define foreach_sfdp_buffer_flag \ + _ (SV_REASSEMBLED, "sv_reassembled") \ + _ (FULL_REASSEMBLED, "full_reassembled") \ + _ (IP6_FINAL_PROTO_VALID, "ip6_final_proto_valid") + +enum +{ +#define _(sym, str) SFDP_BUFFER_FLAG_BIT_##sym, + foreach_sfdp_buffer_flag +#undef _ +}; + +enum +{ +#define _(sym, str) SFDP_BUFFER_FLAG_##sym = 0x1 << SFDP_BUFFER_FLAG_BIT_##sym, + foreach_sfdp_buffer_flag +#undef _ +}; +typedef u64 sfdp_bitmap_t; +typedef u16 session_version_t; +typedef struct +{ + sfdp_bitmap_t service_bitmap; + u16 tenant_index; + session_version_t session_version_before_handoff; + u8 flags; + u8 tcp_flags; + u8 ip6_final_proto; +} __attribute__ ((may_alias)) sfdp_buffer_opaque_t; + +STATIC_ASSERT (sizeof (sfdp_buffer_opaque_t) <= + sizeof (vnet_buffer ((vlib_buffer_t *) 0)->unused), + "size of sfdp_buffer_opaque_t must be <= size of " + "vnet_buffer_opaque_t->unused"); + +#define sfdp_buffer(b) ((sfdp_buffer_opaque_t *) vnet_buffer (b)->unused) + +/* Sometimes a VDCP packet needs to undergo an excursion outside of SFDP (e.g., + * for reassembly). This is used to save the SFDP metadata during this + * excursion + */ +#define sfdp_buffer2(b) ((sfdp_buffer_opaque_t *) vnet_buffer2 (b)->unused) + +#endif diff --git a/src/vnet/sfdp/drop/node.c b/src/vnet/sfdp/drop/node.c new file mode 100644 index 00000000000..cb41be8cb90 --- /dev/null +++ b/src/vnet/sfdp/drop/node.c @@ -0,0 +1,103 @@ +/* SPDX-License-Identifier: Apache-2.0 + * Copyright (c) 2025 Cisco Systems, Inc. + */ + +#include +#include +#define foreach_sfdp_drop_error _ (DROP, "drop") + +typedef enum +{ +#define _(sym, str) SFDP_DROP_ERROR_##sym, + foreach_sfdp_drop_error +#undef _ + SFDP_DROP_N_ERROR, +} sfdp_drop_error_t; + +static char *sfdp_drop_error_strings[] = { +#define _(sym, string) string, + foreach_sfdp_drop_error +#undef _ +}; + +#define foreach_sfdp_drop_next _ (DROP, "error-drop") + +typedef enum +{ +#define _(n, x) SFDP_DROP_NEXT_##n, + foreach_sfdp_drop_next +#undef _ + SFDP_DROP_N_NEXT +} sfdp_drop_next_t; + +typedef struct +{ + u32 flow_id; +} sfdp_drop_trace_t; + +static u8 * +format_sfdp_drop_trace (u8 *s, va_list *args) +{ + vlib_main_t __clib_unused *vm = va_arg (*args, vlib_main_t *); + vlib_node_t __clib_unused *node = va_arg (*args, vlib_node_t *); + sfdp_drop_trace_t *t = va_arg (*args, sfdp_drop_trace_t *); + + s = format (s, "sfdp-drop: flow-id %u (session %u, %s)", t->flow_id, + t->flow_id >> 1, t->flow_id & 0x1 ? "reverse" : "forward"); + return s; +} + +VLIB_NODE_FN (sfdp_drop_node) +(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame) +{ + vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b; + u32 *from = vlib_frame_vector_args (frame); + u32 n_left = frame->n_vectors; + + vlib_buffer_enqueue_to_single_next (vm, node, from, SFDP_DROP_NEXT_DROP, + n_left); + vlib_node_increment_counter (vm, node->node_index, SFDP_DROP_ERROR_DROP, + n_left); + if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE))) + { + int i; + vlib_get_buffers (vm, from, bufs, n_left); + b = bufs; + for (i = 0; i < n_left; i++) + { + if (b[0]->flags & VLIB_BUFFER_IS_TRACED) + { + sfdp_drop_trace_t *t = + vlib_add_trace (vm, node, b[0], sizeof (*t)); + t->flow_id = b[0]->flow_id; + b++; + } + else + break; + } + } + return frame->n_vectors; +} + +VLIB_REGISTER_NODE (sfdp_drop_node) = { + .name = "sfdp-drop", + .vector_size = sizeof (u32), + .format_trace = format_sfdp_drop_trace, + .type = VLIB_NODE_TYPE_INTERNAL, + + .n_errors = ARRAY_LEN (sfdp_drop_error_strings), + .error_strings = sfdp_drop_error_strings, + + .n_next_nodes = SFDP_DROP_N_NEXT, + .next_nodes = { +#define _(n, x) [SFDP_DROP_NEXT_##n] = x, + foreach_sfdp_drop_next +#undef _ + } + +}; + +SFDP_SERVICE_DEFINE (drop) = { .node_name = "sfdp-drop", + .runs_before = SFDP_SERVICES (0), + .runs_after = SFDP_SERVICES (0), + .is_terminal = 1 }; \ No newline at end of file diff --git a/src/vnet/sfdp/expiry/expiry.c b/src/vnet/sfdp/expiry/expiry.c new file mode 100644 index 00000000000..2d8676e6d67 --- /dev/null +++ b/src/vnet/sfdp/expiry/expiry.c @@ -0,0 +1,165 @@ +/* SPDX-License-Identifier: Apache-2.0 + * Copyright (c) 2025 Cisco Systems, Inc. + */ + +#include + +#include +#include + +u8 static expiry_is_enabled = 0; + +int +sfdp_set_expiry_callbacks (const sfdp_expiry_callbacks_t *callbacks) +{ + sfdp_main_t *sfdp = &sfdp_main; + if (expiry_is_enabled) + { + return -1; + } + clib_memcpy (&sfdp->expiry_callbacks, callbacks, sizeof (*callbacks)); + return 0; +} + +int +sfdp_init_timeouts (const sfdp_timeout_t *timeouts, u32 n) +{ + sfdp_main_t *sfdp = &sfdp_main; + if (expiry_is_enabled) + { + return -1; + } + clib_memset (sfdp->timeouts, 0, sizeof (sfdp->timeouts)); + clib_memcpy (sfdp->timeouts, timeouts, sizeof (*timeouts) * n); + return 0; +} + +void +sfdp_enable_disable_expiry_node (u8 is_disable, int skip_main) +{ + u32 n_vms = vlib_num_workers () + 1; + for (int i = !!skip_main; i < n_vms; i++) + { + vlib_main_t *vm = vlib_get_main_by_index (i); + vlib_node_t *node = vlib_get_node_by_name (vm, (u8 *) "sfdp-expire"); + vlib_node_set_state (vm, node->index, + is_disable ? VLIB_NODE_STATE_DISABLED : + VLIB_NODE_STATE_POLLING); + } +} + +void +sfdp_enable_disable_expiry (u8 is_disable) +{ + sfdp_main_t *sfdp = &sfdp_main; + + if (!is_disable) + { /* Init module first */ + expiry_is_enabled = true; + sfdp->expiry_callbacks.enable (); + } + + /* Start/stop pre-input node */ + sfdp_enable_disable_expiry_node (is_disable, sfdp->no_main); + + if (is_disable) + { /* De-init module last */ + sfdp->expiry_callbacks.disable (); + expiry_is_enabled = false; + } +} + +#define foreach_sfdp_expire_error \ + _ (NODE_CALLED, "node-called", INFO, "node called") \ + _ (EXPIRED, "expired", INFO, "session expired") \ + _ (REQUESTED_EVICTION, "requested-eviction", INFO, "requested eviction") + +typedef enum +{ +#define _(sym, name, sev, str) SFDP_EXPIRE_ERROR_##sym, + foreach_sfdp_expire_error +#undef _ + SFDP_EXPIRE_N_ERROR, +} sfdp_expire_error_t; + +static vlib_error_desc_t sfdp_expire_error_descriptors[] = { +#define _(sym, name, sev, str) { name, str, VL_COUNTER_SEVERITY_##sev }, + foreach_sfdp_expire_error +#undef _ +}; + +VLIB_NODE_FN (sfdp_expire_node) +(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame) +{ + sfdp_main_t *sfdp = &sfdp_main; + u32 thread_index = vm->thread_index; + sfdp_per_thread_data_t *ptd = + vec_elt_at_index (sfdp->per_thread_data, thread_index); + u32 *session_index; + + u32 n_remaining_sessions = sfdp_sessions_available_for_this_thread (ptd); + u32 desired_evictions = + (n_remaining_sessions < sfdp->eviction_sessions_margin) ? + (sfdp->eviction_sessions_margin - n_remaining_sessions) : + 0; + + /* Calling callback for expiries or evictions */ + ptd->expired_sessions = sfdp->expiry_callbacks.expire_or_evict_sessions ( + desired_evictions, ptd->expired_sessions); + + vlib_node_increment_counter (vm, node->node_index, + SFDP_EXPIRE_ERROR_NODE_CALLED, 1); + vlib_node_increment_counter (vm, node->node_index, + SFDP_EXPIRE_ERROR_REQUESTED_EVICTION, + desired_evictions); + + if (vec_len (ptd->expired_sessions) == 0) + { + return 0; + } + + sfdp_notify_deleted_sessions (sfdp, ptd->expired_sessions, + vec_len (ptd->expired_sessions)); + + vec_foreach (session_index, ptd->expired_sessions) + { + sfdp_session_t *session = sfdp_session_at_index (*session_index); + sfdp_session_remove (sfdp, ptd, session, thread_index, *session_index); + } + + vlib_node_increment_counter (vm, node->node_index, SFDP_EXPIRE_ERROR_EXPIRED, + vec_len (ptd->expired_sessions)); + vec_reset_length (ptd->expired_sessions); + + /* TODO: some logic so that we are not called too often */ + return 0; +} + +clib_error_t * +sfdp_set_eviction_sessions_margin (u32 margin) +{ + sfdp_main_t *sfdp = &sfdp_main; + u32 max = sfdp_num_sessions () / 2; + if (margin == ~0) + { + margin = SFDP_DEFAULT_EVICTION_SESSIONS_MARGIN; + margin = (margin > max) ? max : margin; + } + + if (margin > max) + { + return clib_error_return ( + 0, "Cannot set a margin greater than half the flow table !"); + } + + sfdp->eviction_sessions_margin = margin; + return 0; +} + +VLIB_REGISTER_NODE (sfdp_expire_node) = { + .name = "sfdp-expire", + .type = VLIB_NODE_TYPE_INPUT, + .n_errors = SFDP_EXPIRE_N_ERROR, + .error_counters = sfdp_expire_error_descriptors, + .state = VLIB_NODE_STATE_DISABLED +}; diff --git a/src/vnet/sfdp/expiry/expiry.h b/src/vnet/sfdp/expiry/expiry.h new file mode 100644 index 00000000000..1a0b052e716 --- /dev/null +++ b/src/vnet/sfdp/expiry/expiry.h @@ -0,0 +1,116 @@ +/* SPDX-License-Identifier: Apache-2.0 + * Copyright (c) 2025 Cisco Systems, Inc. + */ + +#ifndef __included_sfdp_expiry_h__ +#define __included_sfdp_expiry_h__ +#include +#include + +/* Default margin before eviction is requested to expiry module. */ +#define SFDP_DEFAULT_EVICTION_SESSIONS_MARGIN (256 * 256) /* 256 vectors */ + +/* Defined in sfdp.h, but needed in callback functions definitions */ +typedef struct sfdp_session sfdp_session_t; +typedef struct sfdp_timeout sfdp_timeout_t; + +/* Defines callbacks used by sfdp to call expiry module. */ +typedef struct +{ + /* Called by sfdp when it's being enabled. + * The expiry module shouldn't do anything before this is called. */ + void (*enable) (); + + /* Called by sfdp when it's being disabled. + * The expiry module shouldn't do anything after this is called. */ + void (*disable) (); + + /* Called by sfdp on every pre-input step, on every worker thread. + * Provides an opportunity for the session expiry module to timeout flows, + * but also for sfdp to request a specific number of flows to be evicted. + * This is best-effort, and the module could return less than the number + * of evicted sessions. + * desired_expiries: number of requested flow expiries to be added to the + * vector. expired_sessions_vec: vec pointer to be filled with expired + * sessions. return: updated expired_sessions_vec (resize may change the + * vector pointer value). The expiry module may add fewer, or more, sessions + * than the requested number. + * + * Note: Upon placing a session index in expired_sessions_vec, the expiry + * module shall have freed any associated resources, as sfdp will free + * it definitely. + */ + u32 *(*expire_or_evict_sessions) (u32 desired_expiries, + u32 *expired_sessions_vec); + + /* Called by sfdp-lookup after new session entry is created, + * but before the first packet gets procesed with it. + * This gives the opportunity for the session expiry module to initialize + * per-flow state before the packet is processed by any service. */ + void (*notify_new_sessions) (const u32 *new_sessions, u32 len); + + /* Shall return the flow's remaining time to live. + * Used by CLI table dump and API. */ + f64 (*session_remaining_time) (sfdp_session_t *session, f64 now); + + /* Shall format the session expiry information details. + * The variadic arguments used are: + * - sfdp_session_t *session + * - f64 now + * Note: If printed on more than one line, use provided indentation. + */ + u8 *(*format_session_details) (u8 *s, va_list *args); + +} sfdp_expiry_callbacks_t; + +/* Check that sfdp_session_t::expiry_opaque holds expiry module data. */ +#define SFDP_EXPIRY_STATIC_ASSERT_FITS_IN_EXPIRY_OPAQUE(type) \ + STATIC_ASSERT (sizeof (type) <= \ + sizeof (((sfdp_session_t *) (0))->expiry_opaque), \ + #type " too big to fit in expiry_opaque"); + +/* Casts sfdp_session_t::expiry_opaque into provided type. */ +#define SFDP_EXPIRY_SESSION(session, type) \ + ((type *) (sfdp_get_session_expiry_opaque (session))) + +/** Sets the expiry callbacks. + * + * Returns 0 upon success, or a different value if called while sfdp is + * already enabled. + */ +int sfdp_set_expiry_callbacks (const sfdp_expiry_callbacks_t *callbacks); + +/** Provides initial timeout names and defaults to sfdp-core. + * + * Returns 0 upon success, or a different value if called while sfdp is + * already enabled. + */ +int sfdp_init_timeouts (const sfdp_timeout_t *timeouts, u32 n); + +/** Called by sfdp when enabling/disabling expiry. */ +void sfdp_enable_disable_expiry (u8 is_disable); + +/** Called by sfdp_enable_disable_expiry to set the sfdp-expiry pre-input + * to disabled or polling state. */ +void sfdp_enable_disable_expiry_node (u8 is_disable, int skip_main); + +/** Sets the sessions-count margin used to enable flow eviction + * + * Once the number of remaining available sessions passes below the margin. + * the expiry module will be asked to remove existing sessions. + * + * The value used depends on the expiry module implementation. If the + * expiry module can synchronously delete all the needed sessions, then + * the value shall be equal to the maximum number of new sessions that + * can be processed in a single VPP loop. + * Otherwise, a greater value shall be used, as to leave enough time + * for the expiry module to evict flows without taking a risk to run out + * of flow entries. + * + * This function accepts ~0, which will set the margin to a default value. + */ +clib_error_t *sfdp_set_eviction_sessions_margin (u32 margin); + +void sfdp_check_eviction_sessions_margin (); + +#endif /* __included_sfdp_expiry_h__ */ diff --git a/src/vnet/sfdp/expiry/expiry_cli.c b/src/vnet/sfdp/expiry/expiry_cli.c new file mode 100644 index 00000000000..9c6ae152edc --- /dev/null +++ b/src/vnet/sfdp/expiry/expiry_cli.c @@ -0,0 +1,87 @@ +/* SPDX-License-Identifier: Apache-2.0 + * Copyright (c) 2025 Cisco Systems, Inc. + */ + +#include + +#include + +static clib_error_t * +sfdp_set_eviction_sessions_margin_fn (vlib_main_t *vm, unformat_input_t *input, + vlib_cli_command_t *cmd) +{ + unformat_input_t line_input_, *line_input = &line_input_; + clib_error_t *err = 0; + u32 eviction_sessions_margin = ~0; + + if (!unformat_user (input, unformat_line_input, line_input)) + return 0; + while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (line_input, "%u", &eviction_sessions_margin)) + ; + else + { + err = unformat_parse_error (line_input); + unformat_free (line_input); + return err; + } + } + unformat_free (line_input); + + if (eviction_sessions_margin == ~0) + { + return clib_error_return (0, "Missing margin value"); + } + else if ((err = sfdp_set_eviction_sessions_margin ( + eviction_sessions_margin)) != NULL) + { + return err; + } + + return 0; +} + +VLIB_CLI_COMMAND (set_eviction_sessions_margin, static) = { + .path = "set sfdp eviction sessions-margin", + .short_help = "set sfdp eviction sessions-margin ", + .function = sfdp_set_eviction_sessions_margin_fn +}; + +static clib_error_t * +test_sfdp_expiry_disable_fn (vlib_main_t *vm, unformat_input_t *input, + vlib_cli_command_t *cmd) +{ + (void) vm; + (void) input; + (void) cmd; + sfdp_enable_disable_expiry_node (true /* is_disable */, + false /* skip main*/); + return NULL; +} + +/** Function used to force disable expiry in tests. */ +VLIB_CLI_COMMAND (test_sfdp_expiry_disable, static) = { + .path = "test sfdp expiry disable", + .short_help = "[TEST ONLY] disable sfdp-expiry node", + .function = test_sfdp_expiry_disable_fn +}; + +static clib_error_t * +test_sfdp_expiry_enable_fn (vlib_main_t *vm, unformat_input_t *input, + vlib_cli_command_t *cmd) +{ + (void) vm; + (void) input; + (void) cmd; + sfdp_enable_disable_expiry_node (false /* is_disable */, + false /* skip main*/); + return NULL; +} + +/** Function used to enable-back expiry in tests. */ +VLIB_CLI_COMMAND (test_sfdp_expiry_enable, static) = { + .path = "test sfdp expiry enable", + .short_help = "[TEST ONLY] enable sfdp-expiry node", + .function = test_sfdp_expiry_enable_fn +}; diff --git a/src/vnet/sfdp/format.c b/src/vnet/sfdp/format.c new file mode 100644 index 00000000000..4891741974c --- /dev/null +++ b/src/vnet/sfdp/format.c @@ -0,0 +1,478 @@ +/* SPDX-License-Identifier: Apache-2.0 + * Copyright (c) 2025 Cisco Systems, Inc. + */ + +#include +#include +#include +#include +#include +#include +u8 * +format_sfdp_session_state (u8 *s, va_list *args) +{ + u8 session_state = va_arg (*args, u32); +#define _(n, str) \ + if (session_state == SFDP_SESSION_STATE_##n) \ + s = format (s, "%s", (str)); + foreach_sfdp_session_state +#undef _ + return s; +} + +u8 * +format_sfdp_session_type (u8 *s, va_list *args) +{ + sfdp_parser_main_t *pm = &sfdp_parser_main; + sfdp_parser_data_t *pdata; + const char *parser_name; + u32 session_type = va_arg (*args, u32); + u32 parser_index = va_arg (*args, u32); + if (session_type == SFDP_SESSION_TYPE_IP4) + s = format (s, "ipv4"); + else if (session_type == SFDP_SESSION_TYPE_IP6) + s = format (s, "ipv6"); + else if (session_type == SFDP_SESSION_TYPE_USER) + { + pdata = vec_elt_at_index (pm->parsers, parser_index); + parser_name = pdata->name; + s = format (s, "custom-parser: %s", parser_name); + } + return s; +} + +u8 * +format_sfdp_ipv4_context_id (u8 *s, va_list *args) +{ + sfdp_session_ip4_key_t *k = va_arg (*args, sfdp_session_ip4_key_t *); + s = format (s, "%d", k->context_id); + return s; +} + +u8 * +format_sfdp_ipv4_ingress (u8 *s, va_list *args) +{ + sfdp_session_ip4_key_t *k = va_arg (*args, sfdp_session_ip4_key_t *); + s = format (s, "%U:%u", format_ip4_address, &k->ip4_key.ip_addr_lo, + k->ip4_key.port_lo); + return s; +} + +u8 * +format_sfdp_ipv4_egress (u8 *s, va_list *args) +{ + sfdp_session_ip4_key_t *k = va_arg (*args, sfdp_session_ip4_key_t *); + s = format (s, "%U:%u", format_ip4_address, &k->ip4_key.ip_addr_hi, + k->ip4_key.port_hi); + return s; +} + +u8 * +format_sfdp_ipv6_context_id (u8 *s, va_list *args) +{ + sfdp_session_ip6_key_t *k = va_arg (*args, sfdp_session_ip6_key_t *); + s = format (s, "%d", k->context_id); + return s; +} + +u8 * +format_sfdp_ipv6_ingress (u8 *s, va_list *args) +{ + sfdp_session_ip6_key_t *k = va_arg (*args, sfdp_session_ip6_key_t *); + s = format (s, "%U:%u", format_ip6_address, &k->ip6_key.ip6_addr_lo, + k->ip6_key.port_lo); + return s; +} + +u8 * +format_sfdp_ipv6_egress (u8 *s, va_list *args) +{ + sfdp_session_ip6_key_t *k = va_arg (*args, sfdp_session_ip6_key_t *); + s = format (s, "%U:%u", format_ip6_address, &k->ip6_key.ip6_addr_hi, + k->ip6_key.port_hi); + return s; +} + +void +sfdp_table_format_add_header_col (table_t *session_table) +{ + table_add_header_col (session_table, 11, "id", "tenant", "thread", "index", + "type", "proto", "context", "ingress", "egress", + "state", "TTL(s)"); +} + +u32 +sfdp_table_format_insert_session (table_t *t, u32 n, u32 session_index, + sfdp_session_t *session, u32 tenant_id, + f64 now) +{ + u64 session_net = clib_host_to_net_u64 (session->session_id); + sfdp_session_ip46_key_t skey = {}; + __clib_aligned (CLIB_CACHE_LINE_BYTES) + u8 kdata[SFDP_PARSER_MAX_KEY_SIZE]; + sfdp_parser_main_t *pm = &sfdp_parser_main; + sfdp_parser_data_t *parser; + /* Session id */ + table_format_cell (t, n, 0, "0x%U", format_hex_bytes, &session_net, + sizeof (session_net)); + /* Tenant id */ + table_format_cell (t, n, 1, "%d", tenant_id); + /* Owning thread */ + table_format_cell (t, n, 2, "%d", session->owning_thread_index); + /* Session index */ + table_format_cell (t, n, 3, "%d", session_index); + /* Session type */ + table_format_cell (t, n, 4, "%U", format_sfdp_session_type, session->type, + session->parser_index[SFDP_SESSION_KEY_PRIMARY]); + /* Protocol */ + table_format_cell (t, n, 5, "%U", format_ip_protocol, session->proto); + /* Session state */ + table_format_cell (t, n, 9, "%U", format_sfdp_session_state, session->state); + /* Remaining time */ + table_format_cell ( + t, n, 10, "%f", + sfdp_main.expiry_callbacks.session_remaining_time (session, now)); + + if (session->key_flags & SFDP_SESSION_KEY_FLAG_PRIMARY_VALID_IP4) + { + sfdp_normalise_ip4_key (session, &skey.key4, SFDP_SESSION_KEY_PRIMARY); + table_format_cell (t, n, 6, "%U", format_sfdp_ipv4_context_id, + &skey.key4); + table_format_cell (t, n, 7, "%U", format_sfdp_ipv4_ingress, &skey.key4); + table_format_cell (t, n, 8, "%U", format_sfdp_ipv4_egress, &skey.key4); + } + else if (session->key_flags & SFDP_SESSION_KEY_FLAG_PRIMARY_VALID_IP6) + { + sfdp_normalise_ip6_key (session, &skey.key6, SFDP_SESSION_KEY_PRIMARY); + table_format_cell (t, n, 6, "%U", format_sfdp_ipv6_context_id, + &skey.key6); + table_format_cell (t, n, 7, "%U", format_sfdp_ipv6_ingress, &skey.key6); + table_format_cell (t, n, 8, "%U", format_sfdp_ipv6_egress, &skey.key6); + } + else if (session->key_flags & SFDP_SESSION_KEY_FLAG_PRIMARY_VALID_USER) + { + parser = vec_elt_at_index ( + pm->parsers, session->parser_index[SFDP_SESSION_KEY_PRIMARY]); + parser->normalize_key_fn (session, kdata, SFDP_SESSION_KEY_PRIMARY); + table_format_cell ( + t, n, 6, "%U", parser->format_fn[SFDP_PARSER_FORMAT_FUNCTION_CONTEXT], + kdata); + table_format_cell ( + t, n, 7, "%U", parser->format_fn[SFDP_PARSER_FORMAT_FUNCTION_INGRESS], + kdata); + table_format_cell (t, n, 8, "%U", + parser->format_fn[SFDP_PARSER_FORMAT_FUNCTION_EGRESS], + kdata); + } + n += 1; + if (session->key_flags & SFDP_SESSION_KEY_FLAG_SECONDARY_VALID_IP4) + { + sfdp_normalise_ip4_key (session, &skey.key4, SFDP_SESSION_KEY_SECONDARY); + table_format_cell (t, n, 6, "%U", format_sfdp_ipv4_context_id, + &skey.key4); + table_format_cell (t, n, 7, "%U", format_sfdp_ipv4_ingress, &skey.key4); + table_format_cell (t, n, 8, "%U", format_sfdp_ipv4_egress, &skey.key4); + n += 1; + } + else if (session->key_flags & SFDP_SESSION_KEY_FLAG_SECONDARY_VALID_IP6) + { + sfdp_normalise_ip6_key (session, &skey.key6, SFDP_SESSION_KEY_SECONDARY); + table_format_cell (t, n, 6, "%U", format_sfdp_ipv6_context_id, + &skey.key6); + table_format_cell (t, n, 7, "%U", format_sfdp_ipv6_ingress, &skey.key6); + table_format_cell (t, n, 8, "%U", format_sfdp_ipv6_egress, &skey.key6); + n += 1; + } + else if (session->key_flags & SFDP_SESSION_KEY_FLAG_SECONDARY_VALID_USER) + { + parser = vec_elt_at_index ( + pm->parsers, session->parser_index[SFDP_SESSION_KEY_SECONDARY]); + parser->normalize_key_fn (session, kdata, SFDP_SESSION_KEY_SECONDARY); + table_format_cell ( + t, n, 6, "%U", parser->format_fn[SFDP_PARSER_FORMAT_FUNCTION_CONTEXT], + kdata); + table_format_cell ( + t, n, 7, "%U", parser->format_fn[SFDP_PARSER_FORMAT_FUNCTION_INGRESS], + kdata); + table_format_cell (t, n, 8, "%U", + parser->format_fn[SFDP_PARSER_FORMAT_FUNCTION_EGRESS], + kdata); + n += 1; + } + return n; +} + +u8 * +format_sfdp_scope (u8 *s, va_list *args) +{ + u32 scope_index = va_arg (*args, u32); + sfdp_service_main_t *sm = &sfdp_service_main; + + return format (s, "%s", sm->scope_names[scope_index]); +} + +u8 * +format_sfdp_bitmap (u8 *s, va_list *args) +{ + u32 scope_index = va_arg (*args, u32); + sfdp_bitmap_t bmp = va_arg (*args, sfdp_bitmap_t); + sfdp_service_main_t *sm = &sfdp_service_main; + sfdp_service_registration_t **services = + vec_elt_at_index (sm->services_per_scope_index, scope_index)[0]; + int i; + for (i = 0; i < vec_len (services); i++) + if (bmp & services[i]->service_mask[0]) + s = format (s, "%s,", services[i]->node_name); + return s; +} + +u8 * +format_sfdp_session_detail (u8 *s, va_list *args) +{ + u32 session_index = va_arg (*args, u32); + f64 now = va_arg (*args, f64); + sfdp_session_t *session = sfdp_session_at_index (session_index); + u32 scope_index = session->scope_index; + + u64 session_net = clib_host_to_net_u64 (session->session_id); + vlib_counter_t fctr, bctr; + uword thread_index = session->owning_thread_index; + sfdp_session_ip46_key_t skey = {}; + __clib_aligned (CLIB_CACHE_LINE_BYTES) + u8 kdata[SFDP_PARSER_MAX_KEY_SIZE]; + sfdp_parser_main_t *pm = &sfdp_parser_main; + sfdp_parser_data_t *parser = 0; + + vlib_get_combined_counter ( + &sfdp_main.per_session_ctr[SFDP_FLOW_COUNTER_LOOKUP], session_index << 1, + &fctr); + vlib_get_combined_counter ( + &sfdp_main.per_session_ctr[SFDP_FLOW_COUNTER_LOOKUP], + (session_index << 1) | 0x1, &bctr); + /* TODO: deal with secondary keys */ + if (session->key_flags & SFDP_SESSION_KEY_FLAG_PRIMARY_VALID_IP4) + sfdp_normalise_ip4_key (session, &skey.key4, SFDP_SESSION_KEY_PRIMARY); + else if (session->key_flags & SFDP_SESSION_KEY_FLAG_PRIMARY_VALID_IP6) + sfdp_normalise_ip6_key (session, &skey.key6, SFDP_SESSION_KEY_PRIMARY); + else if (session->key_flags & SFDP_SESSION_KEY_FLAG_PRIMARY_VALID_USER) + { + parser = vec_elt_at_index ( + pm->parsers, session->parser_index[SFDP_SESSION_KEY_PRIMARY]); + parser->normalize_key_fn (session, kdata, SFDP_SESSION_KEY_PRIMARY); + } + + s = format (s, " session id: 0x%U\n", format_hex_bytes, &session_net, + sizeof (u64)); + s = format (s, " thread index: %d\n", + (thread_index == SFDP_UNBOUND_THREAD_INDEX) ? -1 : thread_index); + s = format (s, " session index: %d\n", session_index); + + if (session->key_flags & SFDP_SESSION_KEY_FLAG_PRIMARY_VALID_IP4) + s = format (s, " specification: %U\t%U:%u\t-> %U:%u\n", + format_ip_protocol, session->proto, format_sfdp_ipv4_ingress, + &skey.key4, format_sfdp_ipv4_egress, &skey.key4); + else if (session->key_flags & SFDP_SESSION_KEY_FLAG_PRIMARY_VALID_IP6) + s = format (s, " specification: %U\t%U:%u\t-> %U:%u\n", + format_ip_protocol, session->proto, format_sfdp_ipv6_ingress, + &skey.key4, format_sfdp_ipv6_egress, &skey.key4); + else if (session->key_flags & SFDP_SESSION_KEY_FLAG_PRIMARY_VALID_USER) + s = format (s, " specification: %U\t%U:%u\t-> %U:%u\n", + format_ip_protocol, session->proto, + parser->format_fn[SFDP_PARSER_FORMAT_FUNCTION_INGRESS], kdata, + parser->format_fn[SFDP_PARSER_FORMAT_FUNCTION_EGRESS], kdata); + + s = format (s, " state: %U\n", format_sfdp_session_state, session->state); + s = format (s, " %U\n", sfdp_main.expiry_callbacks.format_session_details, + session, now); + s = format (s, " forward service chain: %U\n", format_sfdp_bitmap, + scope_index, session->bitmaps[SFDP_FLOW_FORWARD]); + s = format (s, " reverse service chain: %U\n", format_sfdp_bitmap, + scope_index, session->bitmaps[SFDP_FLOW_REVERSE]); + s = format (s, " counters:\n"); + s = format (s, " forward flow:\n"); + s = format (s, " bytes: %llu\n", fctr.bytes); + s = format (s, " packets: %llu\n", fctr.packets); + s = format (s, " reverse flow:\n"); + s = format (s, " bytes: %llu\n", bctr.bytes); + s = format (s, " packets: %llu\n", bctr.packets); + return s; +} + +u8 * +format_sfdp_tenant (u8 *s, va_list *args) +{ + + u32 indent = format_get_indent (s); + __clib_unused sfdp_main_t *sfdp = va_arg (*args, sfdp_main_t *); + u32 tenant_idx = va_arg (*args, u32); + sfdp_tenant_t *tenant = va_arg (*args, sfdp_tenant_t *); + u32 scope_index; + s = format (s, "index: %d\n", tenant_idx); + s = format (s, "%Ucontext: %d\n", format_white_space, indent, + tenant->context_id); + foreach_sfdp_scope_index (scope_index) + { + s = format (s, "%Uscope: %U\n", format_white_space, indent, + format_sfdp_scope, scope_index); + s = + format (s, "%Uforward service chain:\n", format_white_space, indent + 2); + s = + format (s, "%U%U\n", format_white_space, indent + 4, format_sfdp_bitmap, + scope_index, tenant->bitmaps[SFDP_FLOW_FORWARD]); + s = + format (s, "%Ureverse service chain:\n", format_white_space, indent + 2); + s = + format (s, "%U%U\n", format_white_space, indent + 4, format_sfdp_bitmap, + scope_index, tenant->bitmaps[SFDP_FLOW_REVERSE]); + } + return s; +} + +u8 * +format_sfdp_tenant_extra (u8 *s, va_list *args) +{ + u32 indent = format_get_indent (s); + sfdp_main_t *sfdp = va_arg (*args, sfdp_main_t *); + vlib_main_t *vm = vlib_get_main (); + u32 tenant_idx = va_arg (*args, u32); + __clib_unused sfdp_tenant_t *tenant = va_arg (*args, sfdp_tenant_t *); + sfdp_timeout_t *timeout; + counter_t ctr; + vlib_counter_t ctr2; + s = format (s, "%s\n", "Counters:"); + +#define _(x, y, z) \ + ctr = vlib_get_simple_counter ( \ + &sfdp->tenant_session_ctr[SFDP_TENANT_SESSION_COUNTER_##x], tenant_idx); \ + s = format (s, "%U%s: %llu\n", format_white_space, indent + 2, z, ctr); + foreach_sfdp_tenant_session_counter +#undef _ +#define _(x, y, z) \ + vlib_get_combined_counter ( \ + &sfdp->tenant_data_ctr[SFDP_TENANT_DATA_COUNTER_##x], tenant_idx, &ctr2); \ + s = format (s, "%U%s: %llu packets\n", format_white_space, indent + 2, z, \ + ctr2.packets); \ + s = format (s, "%U %llu bytes\n", format_white_space, \ + indent + strlen (z) + 2, ctr2.bytes); + foreach_sfdp_tenant_data_counter +#undef _ + s = format (s, "%U%s\n", format_white_space, indent, + "Configured Timeout:"); + + sfdp_foreach_timeout (sfdp, timeout) + { + u32 idx = timeout - sfdp->timeouts; + if ((timeout->name != NULL) && strlen (timeout->name)) + { + s = format (s, "%U%s: %d seconds\n", format_white_space, indent + 2, + timeout->name, tenant->timeouts[idx]); + } + } + + s = format (s, "%U%s\n", format_white_space, indent, + "Configured Slowpath nodes:"); +#define _(sym, default, name) \ + s = format (s, "%U%s: %U\n", format_white_space, indent + 2, name, \ + format_vlib_node_name, vm, \ + tenant->sp_node_indices[SFDP_SP_NODE_##sym]); + foreach_sfdp_sp_node +#undef _ + return s; +} + +u8 * +format_sfdp_sp_node (u8 *s, va_list *args) +{ + u32 sp_index = va_arg (*args, u32); +#define _(sym, default, name) \ + if (sp_index == SFDP_SP_NODE_##sym) \ + s = format (s, name); + foreach_sfdp_sp_node +#undef _ + return s; +} + +uword +unformat_sfdp_service (unformat_input_t *input, va_list *args) +{ + sfdp_service_main_t *sm = &sfdp_service_main; + u32 *result = va_arg (*args, u32 *); + int i; + for (u32 scope_index = 0; scope_index < sm->n_scopes; scope_index++) + for (i = 0; i < vec_len (sm->services_per_scope_index[scope_index]); i++) + { + sfdp_service_registration_t *reg = + vec_elt_at_index (sm->services_per_scope_index[scope_index], i)[0]; + if (unformat (input, reg->node_name)) + { + *result = reg->index_in_bitmap[0]; + return 1; + } + } + return 0; +} + +uword +unformat_sfdp_service_bitmap (unformat_input_t *input, va_list *args) +{ + sfdp_bitmap_t *result = va_arg (*args, sfdp_bitmap_t *); + int i = -1; + sfdp_bitmap_t bitmap = 0; + while (unformat_user (input, unformat_sfdp_service, &i)) + bitmap |= 1ULL << i; + if (i > -1) + { + *result = bitmap; + return 1; + } + return 0; +} + +uword +unformat_sfdp_scope_name (unformat_input_t *input, va_list *args) +{ + u32 *result = va_arg (*args, u32 *); + sfdp_service_main_t *sm = &sfdp_service_main; + u32 scope_index; + for (scope_index = 0; scope_index < sm->n_scopes; scope_index++) + if (unformat (input, sm->scope_names[scope_index])) + { + *result = scope_index; + return 1; + } + + return 0; +} + +uword +unformat_sfdp_sp_node (unformat_input_t *input, va_list *args) +{ + u32 *result = va_arg (*args, u32 *); +#define _(sym, default, str) \ + if (unformat (input, str)) \ + { \ + *result = SFDP_SP_NODE_##sym; \ + return 1; \ + } + foreach_sfdp_sp_node +#undef _ + return 0; +} + +uword +unformat_sfdp_timeout_name (unformat_input_t *input, va_list *args) +{ + u32 *result = va_arg (*args, u32 *); + sfdp_main_t *sfdp = &sfdp_main; + sfdp_timeout_t *timeout; + sfdp_foreach_timeout (sfdp, timeout) + { + if ((timeout->name != NULL) && strlen (timeout->name) && + unformat (input, timeout->name)) + { + *result = timeout - sfdp->timeouts; + return 1; + } + } + return 0; +} diff --git a/src/vnet/sfdp/lookup/full_reass_node.c b/src/vnet/sfdp/lookup/full_reass_node.c new file mode 100644 index 00000000000..bbd3e208fdd --- /dev/null +++ b/src/vnet/sfdp/lookup/full_reass_node.c @@ -0,0 +1,155 @@ +/* + * Copyright (c) 2024 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#include +#include + +typedef struct +{ +} sfdp_lookup_sp_full_reass_trace_t; + +static u8 * +format_sfdp_lookup_sp_full_reass_trace (u8 *s, va_list *args) +{ + CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); + vlib_node_t *node = va_arg (*args, vlib_node_t *); + CLIB_UNUSED (sfdp_lookup_sp_full_reass_trace_t * t) = + va_arg (*args, sfdp_lookup_sp_full_reass_trace_t *); + s = format (s, "%v: sent to full reass node", node->name); + + return s; +} + +#define foreach_sfdp_lookup_sp_full_reass_next \ + _ (IP4, "ip4-full-reassembly-custom-context") \ + _ (IP6, "ip6-full-reassembly-custom-context") + +enum +{ +#define _(sym, str) SFDP_LOOKUP_SP_FULL_REASS_NEXT_##sym, + foreach_sfdp_lookup_sp_full_reass_next +#undef _ + SFDP_LOOKUP_SP_FULL_REASS_N_NEXT +}; + +#define foreach_sfdp_lookup_sp_full_reass_error _ (NOERROR, "No error") + +typedef enum +{ +#define _(sym, str) SFDP_LOOKUP_SP_FULL_REASS_ERROR_##sym, + SFDP_LOOKUP_SP_FULL_REASS_N_ERROR +#undef _ +} sfdp_lookup_sp_full_reass_error_t; + +static char *sfdp_lookup_sp_full_reass_error_strings[] = { +#define _(sym, str) str, + foreach_sfdp_lookup_sp_full_reass_error +#undef _ +}; + +static_always_inline u32 +sfdp_lookup_sp_full_reass_inline (vlib_main_t *vm, vlib_node_runtime_t *node, + vlib_frame_t *frame, bool is_ip6) +{ + sfdp_reass_main_t *vrm = &sfdp_reass_main; + vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b; + u32 aux_data[VLIB_FRAME_SIZE], *a; + u32 *from = vlib_frame_vector_args (frame); + u32 n_left = frame->n_vectors; + + vlib_get_buffers (vm, from, bufs, n_left); + b = bufs; + a = aux_data; + + // TODO: prefetch + 4-loop + while (n_left) + { + a[0] = b[0]->flow_id; + if (PREDICT_FALSE (b[0]->flags & VLIB_BUFFER_IS_TRACED)) + { + sfdp_lookup_sp_full_reass_trace_t *t = + vlib_add_trace (vm, node, b[0], sizeof (*t)); + } + + /* Save the tenant index */ + sfdp_buffer2 (b[0])->tenant_index = sfdp_buffer (b[0])->tenant_index; + sfdp_buffer2 (b[0])->flags = SFDP_BUFFER_FLAG_FULL_REASSEMBLED; + + vnet_buffer (b[0])->ip.reass.next_index = + is_ip6 ? vrm->ip6_full_reass_next_index : + vrm->ip4_full_reass_next_index; + vnet_buffer (b[0])->ip.reass.error_next_index = + is_ip6 ? vrm->ip6_full_reass_err_next_index : + vrm->ip4_full_reass_err_next_index; + b += 1; + a += 1; + n_left -= 1; + } + + vlib_buffer_enqueue_to_single_next_with_aux ( + vm, node, from, aux_data, + is_ip6 ? SFDP_LOOKUP_SP_FULL_REASS_NEXT_IP6 : + SFDP_LOOKUP_SP_FULL_REASS_NEXT_IP4, + frame->n_vectors); + + return frame->n_vectors; +} + +VLIB_NODE_FN (sfdp_lookup_ip4_sp_full_reass) +(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame) +{ + return sfdp_lookup_sp_full_reass_inline (vm, node, frame, 0); +} + +VLIB_REGISTER_NODE (sfdp_lookup_ip4_sp_full_reass) = { + .name = "sfdp-lookup-ip4-sp-full-reass", + .vector_size = sizeof (u32), + .format_trace = format_sfdp_lookup_sp_full_reass_trace, + .type = VLIB_NODE_TYPE_INTERNAL, + + .n_errors = ARRAY_LEN (sfdp_lookup_sp_full_reass_error_strings), + .error_strings = sfdp_lookup_sp_full_reass_error_strings, + .next_nodes = { +#define _(sym, str) [SFDP_LOOKUP_SP_FULL_REASS_NEXT_##sym] = str, + foreach_sfdp_lookup_sp_full_reass_next +#undef _ + }, + .n_next_nodes = SFDP_LOOKUP_SP_FULL_REASS_N_NEXT, +}; + +VLIB_NODE_FN (sfdp_lookup_ip6_sp_full_reass) +(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame) +{ + return sfdp_lookup_sp_full_reass_inline (vm, node, frame, 1); +} + +VLIB_REGISTER_NODE (sfdp_lookup_ip6_sp_full_reass) = { + .name = "sfdp-lookup-ip6-sp-full-reass", + .vector_size = sizeof (u32), + .format_trace = format_sfdp_lookup_sp_full_reass_trace, + .type = VLIB_NODE_TYPE_INTERNAL, + + .n_errors = ARRAY_LEN (sfdp_lookup_sp_full_reass_error_strings), + .error_strings = sfdp_lookup_sp_full_reass_error_strings, + .next_nodes = { +#define _(sym, str) [SFDP_LOOKUP_SP_FULL_REASS_NEXT_##sym] = str, + foreach_sfdp_lookup_sp_full_reass_next +#undef _ + }, + .n_next_nodes = SFDP_LOOKUP_SP_FULL_REASS_N_NEXT, +}; \ No newline at end of file diff --git a/src/vnet/sfdp/lookup/icmp_error_node.c b/src/vnet/sfdp/lookup/icmp_error_node.c new file mode 100644 index 00000000000..1e0e1518307 --- /dev/null +++ b/src/vnet/sfdp/lookup/icmp_error_node.c @@ -0,0 +1,398 @@ +/* SPDX-License-Identifier: Apache-2.0 + * Copyright (c) 2025 Cisco Systems, Inc. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "lookup_inlines.h" + +#define foreach_sfdp_lookup_icmp_error \ + _ (NO_SESSION, "no session") \ + _ (INVALID_INNER_PKT, "invalid inner packet") + +typedef enum +{ +#define _(sym, str) SFDP_LOOKUP_ICMP_ERROR_##sym, + foreach_sfdp_lookup_icmp_error +#undef _ + SFDP_LOOKUP_ICMP_ERROR_N_ERROR, +} sfdp_icmp_lookup_error_t; + +static char *sfdp_lookup_icmp_error_strings[] = { +#define _(sym, string) string, + foreach_sfdp_lookup_icmp_error +#undef _ +}; + +#define foreach_sfdp_lookup_icmp_next _ (DROP, "error-drop") + +typedef enum +{ +#define _(a, b) SFDP_LOOKUP_ICMP_NEXT_##a, + foreach_sfdp_lookup_icmp_next +#undef _ + SFDP_LOOKUP_ICMP_N_NEXT +} sfdp_lookup_icmp_next_t; + +typedef struct +{ + +} sfdp_lookup_icmp_trace_t; + +static u8 * +format_sfdp_lookup_icmp_trace (u8 *s, va_list *args) +{ + vlib_main_t __clib_unused *vm = va_arg (*args, vlib_main_t *); + vlib_node_t __clib_unused *node = va_arg (*args, vlib_node_t *); + sfdp_lookup_icmp_trace_t __clib_unused *t = + va_arg (*args, sfdp_lookup_icmp_trace_t *); + s = format (s, "%v:", node->name); + return s; +} + +static_always_inline uword +sfdp_lookup_icmp_inline (vlib_main_t *vm, vlib_node_runtime_t *node, + vlib_frame_t *frame, u8 is_ipv6) +{ + sfdp_main_t *sfdp = &sfdp_main; + u32 thread_index = vm->thread_index; + + u32 *from = vlib_frame_vector_args (frame); + u32 n_left = frame->n_vectors; + u32 *bi = from; + vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b = bufs; + vlib_buffer_t *local_bufs[VLIB_FRAME_SIZE]; + i16 current_data[VLIB_FRAME_SIZE], *cd = current_data; + SFDP_SESSION_IP46_KEYS_TYPE (VLIB_FRAME_SIZE) keys; + sfdp_session_ip4_key_t *k4 = keys.keys4; + sfdp_session_ip6_key_t *k6 = keys.keys6; + u64 lookup_vals[VLIB_FRAME_SIZE], *lv = lookup_vals; + u64 hashes[VLIB_FRAME_SIZE], *h = hashes; + i16 l4_hdr_off[VLIB_FRAME_SIZE], *l4o = l4_hdr_off; + u16 local_next_indices[VLIB_FRAME_SIZE], *lni = local_next_indices; + u32 local_buffer_indices[VLIB_FRAME_SIZE], *lbi = local_buffer_indices; + u32 handoff_buffer_indices[VLIB_FRAME_SIZE], *hbi = handoff_buffer_indices; + u16 handoff_thread_indices[VLIB_FRAME_SIZE], *hti = handoff_thread_indices; + bool local_has_session[VLIB_FRAME_SIZE], *lhs = local_has_session; + + vlib_get_buffers (vm, from, bufs, n_left); + + if (!is_ipv6) + { + while (n_left) + { + /* Advance the current buffer */ + cd[0] = b[0]->current_data; + b[0]->current_data = vnet_buffer (b[0])->l4_hdr_offset + + 8 /* ICMP header + unused field */; + sfdp_calc_key_v4 (b[0], b[0]->flow_id, k4, lv, h, l4o, 1); + b[0]->current_data = cd[0]; + + cd += 1; + b += 1; + k4 += 1; + lv += 1; + h += 1; + l4o += 1; + n_left -= 1; + } + } + else + { + while (n_left) + { + /* Advance the current buffer */ + cd[0] = b[0]->current_data; + b[0]->current_data = vnet_buffer (b[0])->l4_hdr_offset + + 8 /* ICMP header + unused field */; + sfdp_calc_key_v6 (b[0], b[0]->flow_id, k6, lv, h, l4o, 1); + b[0]->current_data = cd[0]; + + cd += 1; + b += 1; + k6 += 1; + lv += 1; + h += 1; + l4o += 1; + n_left -= 1; + } + } + /* Perform the lookup */ + b = bufs; + bi = from; + k4 = keys.keys4; + k6 = keys.keys6; + lv = lookup_vals; + h = hashes; + l4o = l4_hdr_off; + + n_left = frame->n_vectors; + + if (!is_ipv6) + { + while (n_left) + { + uword flow_thread_index; + u16 tenant_index; + sfdp_tenant_t *tenant; + clib_bihash_kv_24_8_t kv4; + + if (lv[0] & SFDP_LV_TO_SP) + { + vlib_node_increment_counter ( + vm, node->node_index, SFDP_LOOKUP_ICMP_ERROR_INVALID_INNER_PKT, + 1); + lbi[0] = bi[0]; + lni[0] = SFDP_LOOKUP_ICMP_NEXT_DROP; + lhs[0] = false; + + lbi += 1; + lni += 1; + lhs += 1; + goto next_pkt4; + } + + clib_memcpy (&kv4.key, k4, 24); + if (clib_bihash_search_inline_with_hash_24_8 (&sfdp->table4, h[0], + &kv4)) + { + /* TODO: not drop? */ + vlib_node_increment_counter ( + vm, node->node_index, SFDP_LOOKUP_ICMP_ERROR_NO_SESSION, 1); + lbi[0] = bi[0]; + lni[0] = SFDP_LOOKUP_ICMP_NEXT_DROP; + lhs[0] = false; + + lbi += 1; + lni += 1; + lhs += 1; + goto next_pkt4; + } + else + { + lv[0] ^= kv4.value; + } + + flow_thread_index = sfdp_thread_index_from_lookup (lv[0]); + + if (thread_index != flow_thread_index) + { + hbi[0] = bi[0]; + hti[0] = flow_thread_index; + + hbi += 1; + hti += 1; + goto next_pkt4; + } + /* Flip last bit of flow index because the error goes into the + * opposite direction */ + b[0]->flow_id = sfdp_pseudo_flow_index_from_lookup (lv[0]) ^ 0x1; + + tenant_index = sfdp_buffer (b[0])->tenant_index; + tenant = sfdp_tenant_at_index (sfdp, tenant_index); + + lbi[0] = bi[0]; + lni[0] = tenant->icmp4_lookup_next; + lhs[0] = true; + + lbi += 1; + lni += 1; + lhs += 1; + + next_pkt4: + + b += 1; + bi += 1; + k4 += 1; + lv += 1; + h += 1; + l4o += 1; + n_left -= 1; + } + } + else + { + while (n_left) + { + uword flow_thread_index; + u16 tenant_index; + sfdp_tenant_t *tenant; + clib_bihash_kv_48_8_t kv6; + + if (lv[0] & SFDP_LV_TO_SP) + { + vlib_node_increment_counter ( + vm, node->node_index, SFDP_LOOKUP_ICMP_ERROR_INVALID_INNER_PKT, + 1); + lbi[0] = bi[0]; + lni[0] = SFDP_LOOKUP_ICMP_NEXT_DROP; + lhs[0] = false; + + lbi += 1; + lni += 1; + lhs += 1; + goto next_pkt6; + } + + clib_memcpy (&kv6.key, k6, 48); + if (clib_bihash_search_inline_with_hash_48_8 (&sfdp->table6, h[0], + &kv6)) + { + /* TODO: not drop? */ + vlib_node_increment_counter ( + vm, node->node_index, SFDP_LOOKUP_ICMP_ERROR_NO_SESSION, 1); + lbi[0] = bi[0]; + lni[0] = SFDP_LOOKUP_ICMP_NEXT_DROP; + lhs[0] = false; + + lbi += 1; + lni += 1; + lhs += 1; + goto next_pkt6; + } + else + { + lv[0] ^= kv6.value; + } + + flow_thread_index = sfdp_thread_index_from_lookup (lv[0]); + + if (thread_index != flow_thread_index) + { + hbi[0] = bi[0]; + hti[0] = flow_thread_index; + + hbi += 1; + hti += 1; + goto next_pkt6; + } + /* Flip last bit of flow index because the error goes into the + * opposite direction */ + b[0]->flow_id = sfdp_pseudo_flow_index_from_lookup (lv[0]) ^ 0x1; + + tenant_index = sfdp_buffer (b[0])->tenant_index; + tenant = sfdp_tenant_at_index (sfdp, tenant_index); + + lbi[0] = bi[0]; + lni[0] = tenant->icmp6_lookup_next; + lhs[0] = true; + + lbi += 1; + lni += 1; + lhs += 1; + + next_pkt6: + + b += 1; + bi += 1; + k4 += 1; + lv += 1; + h += 1; + l4o += 1; + n_left -= 1; + } + } + + if (lbi - local_buffer_indices) + { + uword n = lbi - local_buffer_indices; + uword n_left_local = n; + lbi = local_buffer_indices; + lhs = local_has_session; + vlib_get_buffers (vm, lbi, local_bufs, n); + b = local_bufs; + while (n_left_local) + { + sfdp_session_t *session; + if (lhs[0]) + { + u32 session_idx = sfdp_session_from_flow_index (b[0]->flow_id); + session = sfdp_session_at_index (session_idx); + sfdp_buffer (b[0])->tenant_index = session->tenant_idx; + } + lbi += 1; + lhs += 1; + n_left_local -= 1; + b += 1; + } + vlib_buffer_enqueue_to_next (vm, node, local_buffer_indices, + local_next_indices, n); + } + + if (hbi - handoff_buffer_indices) + vlib_buffer_enqueue_to_thread ( + vm, node, + is_ipv6 ? sfdp->icmp6_error_frame_queue_index : + sfdp->icmp4_error_frame_queue_index, + handoff_buffer_indices, handoff_thread_indices, + hbi - handoff_buffer_indices, 1); + + if (node->flags & VLIB_NODE_FLAG_TRACE) + { + n_left = frame->n_vectors; + b = bufs; + while (n_left) + { + if (b[0]->flags & VLIB_BUFFER_IS_TRACED) + { + sfdp_lookup_icmp_trace_t *t = + vlib_add_trace (vm, node, b[0], sizeof (*t)); + } + b += 1; + n_left -= 1; + } + } + + return frame->n_vectors; +} + +VLIB_NODE_FN (sfdp_lookup_ip4_icmp_node) +(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame) +{ + return sfdp_lookup_icmp_inline (vm, node, frame, 0 /* is ipv6 */); +} + +VLIB_NODE_FN (sfdp_lookup_ip6_icmp_node) +(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame) +{ + return sfdp_lookup_icmp_inline (vm, node, frame, 1 /* is ipv6 */); +} + +VLIB_REGISTER_NODE (sfdp_lookup_ip4_icmp_node) = { + .name = "sfdp-lookup-ip4-icmp", + .vector_size = sizeof (u32), + .format_trace = format_sfdp_lookup_icmp_trace, + .type = VLIB_NODE_TYPE_INTERNAL, + + .n_errors = ARRAY_LEN (sfdp_lookup_icmp_error_strings), + .error_strings = sfdp_lookup_icmp_error_strings, + .next_nodes = { +#define _(a, b) [SFDP_LOOKUP_ICMP_NEXT_##a] = (b), + foreach_sfdp_lookup_icmp_next +#undef _ + }, + .n_next_nodes = SFDP_LOOKUP_ICMP_N_NEXT +}; + +VLIB_REGISTER_NODE (sfdp_lookup_ip6_icmp_node) = { + .name = "sfdp-lookup-ip6-icmp", + .vector_size = sizeof (u32), + .format_trace = format_sfdp_lookup_icmp_trace, + .type = VLIB_NODE_TYPE_INTERNAL, + + .n_errors = ARRAY_LEN (sfdp_lookup_icmp_error_strings), + .error_strings = sfdp_lookup_icmp_error_strings, + .next_nodes = { +#define _(a, b) [SFDP_LOOKUP_ICMP_NEXT_##a] = (b), + foreach_sfdp_lookup_icmp_next +#undef _ + }, + .n_next_nodes = SFDP_LOOKUP_ICMP_N_NEXT +}; diff --git a/src/vnet/sfdp/lookup/lookup.h b/src/vnet/sfdp/lookup/lookup.h new file mode 100644 index 00000000000..4f1298b2e37 --- /dev/null +++ b/src/vnet/sfdp/lookup/lookup.h @@ -0,0 +1,30 @@ +/* SPDX-License-Identifier: Apache-2.0 + * Copyright (c) 2025 Cisco Systems, Inc. + */ + +#ifndef __included_lookup_h__ +#define __included_lookup_h__ + +#define SFDP_LV_TO_SP ((u64) 0x1 << 63) +#define foreach_sfdp_lookup_error \ + _ (MISS, "flow miss") \ + _ (LOCAL, "local flow") \ + _ (REMOTE, "remote flow") \ + _ (COLLISION, "hash add collision") \ + _ (CON_DROP, "handoff drop") \ + _ (TABLE_OVERFLOW, "table overflow") + +typedef enum +{ +#define _(sym, str) SFDP_LOOKUP_ERROR_##sym, + foreach_sfdp_lookup_error +#undef _ + SFDP_LOOKUP_N_ERROR, +} sfdp_lookup_error_t; +__clib_unused static char *sfdp_lookup_error_strings[] = { +#define _(sym, string) string, + foreach_sfdp_lookup_error +#undef _ +}; + +#endif \ No newline at end of file diff --git a/src/vnet/sfdp/lookup/lookup_common.h b/src/vnet/sfdp/lookup/lookup_common.h new file mode 100644 index 00000000000..29e5be32aad --- /dev/null +++ b/src/vnet/sfdp/lookup/lookup_common.h @@ -0,0 +1,171 @@ +/* SPDX-License-Identifier: Apache-2.0 + * Copyright (c) 2025 Cisco Systems, Inc. + */ + +#ifndef __included_lookup_common_h__ +#define __included_lookup_common_h__ +#include +#include +#ifdef __SSE4_1__ +#define u32x4_insert(v, x, i) (u32x4) _mm_insert_epi32 ((__m128i) (v), x, i) +#else +static_always_inline u32x4 +u32x4_insert (u32x4 v, u32 x, int i) +{ + u32x4 tmp = v; + tmp[i] = x; + return tmp; +} +#endif + +#ifdef __SSE3__ +#define u8x8_shuffle(v, i) (u8x8) _mm_shuffle_pi8 ((__m64) (v), (__m64) i) +#elif defined(__clang__) +static_always_inline u8x8 +u8x8_shuffle (u8x8 v, u8x8 i) +{ + u8x8 tmp = { 0 }; + u16x8 tmp2; + tmp[0] = v[i[0] & 0x7]; + tmp[1] = v[i[1] & 0x7]; + tmp[2] = v[i[2] & 0x7]; + tmp[3] = v[i[3] & 0x7]; + tmp[4] = v[i[4] & 0x7]; + tmp[5] = v[i[5] & 0x7]; + tmp[6] = v[i[6] & 0x7]; + tmp[7] = v[i[7] & 0x7]; + tmp2 = __builtin_convertvector (i, u16x8); + tmp2 &= (u16x8){ 128, 128, 128, 128, 128, 128, 128, 128 }; + tmp2 <<= 1; + tmp2 -= 1; + tmp2 = ~tmp2; + tmp &= __builtin_convertvector (tmp2, u8x8); + return tmp; +} +#else +#define u8x8_shuffle(v, i) __builtin_shuffle ((u8x8) v, (u8x8) i) +#endif + +#ifndef CLIB_HAVE_VEC256 +#define u32x8_splat(i) ((u32) (i) & (u32x8){ ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0 }) +#endif + +#ifndef SHUFFLE +#if defined(__clang__) +#define SHUFFLE(v1, v2, i) __builtin_shufflevector ((v1), (v2), (i)) +#elif defined(__GNUC__) +#define SHUFFLE(v1, v2, i) __builtin_shuffle ((v1), (v2), (i)) +#endif +#endif + +#define u8x16_SHUFFLE(v1, v2, i) \ + (u8x16) SHUFFLE ((u8x16) (v1), (u8x16) (v2), (u8x16) (i)) +#define u32x8_SHUFFLE(v1, v2, i) \ + (u32x8) SHUFFLE ((u32x8) (v1), (u32x8) (v2), (u32x8) (i)) + +#ifdef __SSE3__ +#define u8x16_shuffle_dynamic(v, i) \ + (u8x16) _mm_shuffle_epi8 ((__m128i) v, (__m128i) i) +#elif defined(__clang__) +static_always_inline u8x16 +u8x16_shuffle_dynamic (u8x16 v, u8x16 i) +{ + u8x16 tmp = { 0 }; + u16x16 tmp2; + tmp[0] = v[i[0] & 0xf]; + tmp[1] = v[i[1] & 0xf]; + tmp[2] = v[i[2] & 0xf]; + tmp[3] = v[i[3] & 0xf]; + tmp[4] = v[i[4] & 0xf]; + tmp[5] = v[i[5] & 0xf]; + tmp[6] = v[i[6] & 0xf]; + tmp[7] = v[i[7] & 0xf]; + tmp[8] = v[i[8] & 0xf]; + tmp[9] = v[i[9] & 0xf]; + tmp[10] = v[i[10] & 0xf]; + tmp[11] = v[i[11] & 0xf]; + tmp[12] = v[i[12] & 0xf]; + tmp[13] = v[i[13] & 0xf]; + tmp[14] = v[i[14] & 0xf]; + tmp[15] = v[i[15] & 0xf]; + tmp2 = __builtin_convertvector (i, u16x16); + tmp2 &= (u16x16){ 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128 }; + tmp2 <<= 1; + tmp2 -= tmp2 >> 8; + tmp2 = ~tmp2; + tmp &= __builtin_convertvector (tmp2, u8x16); + return tmp; +} +#else +static_always_inline u8x16 +u8x16_shuffle_dynamic (u8x16 v, u8x16 i) +{ + u8x16 tmp = { 0 }; + tmp = __builtin_shuffle (v, i); + i >>= 7; + i -= 1; + tmp &= i; + return tmp; +} +#endif + +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wpsabi" +#ifdef __AVX2__ +#define u32x8_shuffle_dynamic(v, i) \ + (u32x8) _mm256_permutevar8x32_epi32 ((__m256i) v, (__m256i) i) +#elif defined(__clang__) +static_always_inline u32x8 +u32x8_shuffle_dynamic (u32x8 v, u32x8 i) +{ + u32x8 tmp = { 0 }; + tmp[0] = v[i[0] & 0x7]; + tmp[1] = v[i[1] & 0x7]; + tmp[2] = v[i[2] & 0x7]; + tmp[3] = v[i[3] & 0x7]; + tmp[4] = v[i[4] & 0x7]; + tmp[5] = v[i[5] & 0x7]; + tmp[6] = v[i[6] & 0x7]; + tmp[7] = v[i[7] & 0x7]; + return tmp; +} +#else +#define u32x8_shuffle_dynamic(v, i) __builtin_shuffle ((u32x8) v, (u32x8) i) +#endif + +static_always_inline u32x2 +u32x2_insert (u32x2 x, u32 y, uword idx) +{ + u32x2 tmp = x; + tmp[idx] = y; + return tmp; +} + +static_always_inline u8x8 +u8x8_insert (u8x8 x, u8 y, uword idx) +{ + u8x8 tmp = x; + tmp[idx] = y; + return tmp; +} +#pragma GCC diagnostic pop +__clib_unused static const u8 l4_mask_bits[256] = { + [IP_PROTOCOL_ICMP] = 16, [IP_PROTOCOL_IGMP] = 8, + [IP_PROTOCOL_ICMP6] = 16, [IP_PROTOCOL_TCP] = 32, + [IP_PROTOCOL_UDP] = 32, [IP_PROTOCOL_IPSEC_ESP] = 32, + [IP_PROTOCOL_IPSEC_AH] = 32, +}; + +/* L4 data offset to copy into session */ +__clib_unused static const u8 l4_offset_32w[256] = { + [IP_PROTOCOL_ICMP] = 1, [IP_PROTOCOL_ICMP6] = 1 +}; + +/* TODO: add ICMP, ESP, and AH (+ additional + * branching or lookup for different + * shuffling mask) */ +__clib_unused static const u64 tcp_udp_bitmask = + ((1 << IP_PROTOCOL_TCP) | (1 << IP_PROTOCOL_UDP)); + +#endif /* __included_lookup_common_h__ */ \ No newline at end of file diff --git a/src/vnet/sfdp/lookup/lookup_inlines.h b/src/vnet/sfdp/lookup/lookup_inlines.h new file mode 100644 index 00000000000..6b136f5522d --- /dev/null +++ b/src/vnet/sfdp/lookup/lookup_inlines.h @@ -0,0 +1,17 @@ +/* SPDX-License-Identifier: Apache-2.0 + * Copyright (c) 2025 Cisco Systems, Inc. + */ + +#ifndef __included_lookup_inlines_h__ +#define __included_lookup_inlines_h__ +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#endif /* __included_lookup_inlines_h__ */ diff --git a/src/vnet/sfdp/lookup/lookup_ip4.h b/src/vnet/sfdp/lookup/lookup_ip4.h new file mode 100644 index 00000000000..2aa4011ae87 --- /dev/null +++ b/src/vnet/sfdp/lookup/lookup_ip4.h @@ -0,0 +1,215 @@ +/* SPDX-License-Identifier: Apache-2.0 + * Copyright (c) 2025 Cisco Systems, Inc. + */ + +#ifndef __included_sfdp_lookup_ip4_h__ +#define __included_sfdp_lookup_ip4_h__ +#include +#include +#include +#include +static const u64 icmp4_type_ping_bitmask = + (1ULL << ICMP4_echo_request) | (1ULL << ICMP4_echo_reply); + +static const u64 icmp4_type_errors_bitmask = + (1ULL << ICMP4_destination_unreachable) | (1ULL << ICMP4_redirect) | + (1ULL << ICMP4_time_exceeded); + +#define IP4_REASS_NEEDED_FLAGS \ + ((u16) IP4_HEADER_FLAG_MORE_FRAGMENTS | (u16) ((1 << 13) - 1)) + +#define KEY_IP4_SHUFF_NO_NORM \ + 0, 1, 2, 3, -1, 5, -1, -1, 8, 9, 10, 11, 12, 13, 14, 15 + +#define KEY_IP4_SHUFF_NORM \ + 2, 3, 0, 1, -1, 5, -1, -1, 12, 13, 14, 15, 8, 9, 10, 11 + +#define SRC_IP4_BYTESWAP_X2 \ + 11, 10, 9, 8, 16, 16, 16, 16, 11, 10, 9, 8, 16, 16, 16, 16 +#define DST_IP4_BYTESWAP_X2 \ + 15, 14, 13, 12, 16, 16, 16, 16, 15, 14, 13, 12, 16, 16, 16, 16 + +#define KEY_IP4_SWAP_ICMP \ + 2, 3, 0, 1, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16 + +static const u8x16 key_ip4_shuff_no_norm = { KEY_IP4_SHUFF_NO_NORM }; + +static const u8x16 key_ip4_shuff_norm = { KEY_IP4_SHUFF_NORM }; + +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wpsabi" +static_always_inline u8 +sfdp_calc_key_v4 (vlib_buffer_t *b, u32 context_id, + sfdp_session_ip4_key_t *skey, u64 *lookup_val, u64 *h, + i16 *l4_hdr_offset, u8 slow_path) +{ + u8 pr; + i64x2 norm, zero = {}; + u8x16 k, swap; + u32 l4_hdr; + void *next_header; + ip4_header_t *ip = vlib_buffer_get_current (b); + u8 slowpath_needed; + u8 reass_needed; + u8 l4_from_sv_reass = 0; + u8 from_full_reass; + u8 tcp_or_udp; + u8 unknown_protocol; + /* load last 16 bytes of ip header into 128-bit register */ + k = *(u8x16u *) ((u8 *) ip + 4); + pr = ip->protocol; + next_header = ip4_next_header (ip); + l4_hdr_offset[0] = (u8 *) next_header - b->data; + + reass_needed = !!(ip->flags_and_fragment_offset & + clib_host_to_net_u16 (IP4_REASS_NEEDED_FLAGS)); + tcp_or_udp = pr == IP_PROTOCOL_TCP || pr == IP_PROTOCOL_UDP; + unknown_protocol = !tcp_or_udp && pr != IP_PROTOCOL_ICMP; + from_full_reass = + sfdp_buffer2 (b)->flags & SFDP_BUFFER_FLAG_FULL_REASSEMBLED; + slowpath_needed = !tcp_or_udp || reass_needed || from_full_reass; + + if (slow_path && reass_needed && + sfdp_buffer2 (b)->flags & SFDP_BUFFER_FLAG_SV_REASSEMBLED) + { + /* This packet comes back from shallow virtual reassembly */ + l4_from_sv_reass = 1; + } + else if (slow_path && reass_needed) + { + /* Reassembly is needed and has not been done yet */ + lookup_val[0] = (u64) SFDP_SP_NODE_IP4_REASS << 32 | SFDP_LV_TO_SP; + return slowpath_needed; + } + + /* non TCP, UDP or ICMP packets are going to slowpath */ + if (slow_path && unknown_protocol) + { + lookup_val[0] = + (u64) SFDP_SP_NODE_IP4_UNKNOWN_PROTO << 32 | SFDP_LV_TO_SP; + /* + * full_reass will change the sfdp buf, need to restore it + * before returing. + */ + if (from_full_reass) + goto restore_sfdp_buf; + + return slowpath_needed; + } + + /* byteswap src and dst ip and splat into all 4 elts of u32x4, then + * compare so result will hold all ones if we need to swap src and dst + * signed vector type is used as */ + norm = (((i64x2) u8x16_shuffle2 (k, zero, SRC_IP4_BYTESWAP_X2)) > + ((i64x2) u8x16_shuffle2 (k, zero, DST_IP4_BYTESWAP_X2))); + + if (slow_path && pr == IP_PROTOCOL_ICMP) + { + u8 type; + i64 x, y; + + if (l4_from_sv_reass) + type = vnet_buffer (b)->ip.reass.icmp_type_or_tcp_flags; + else + { + icmp46_header_t *icmp = next_header; + type = icmp->type; + } + x = (1ULL << type) & icmp4_type_ping_bitmask; + y = (1ULL << type) & icmp4_type_errors_bitmask; + if (x == 0) + { + /* If it's an known ICMP error, treat in the specific slowpath (with + a lookup on inner packet), otherwise, it's an unknown protocol */ + lookup_val[0] = + y ? (u64) SFDP_SP_NODE_IP4_ICMP4_ERROR << 32 | SFDP_LV_TO_SP : + (u64) SFDP_SP_NODE_IP4_UNKNOWN_PROTO << 32 | SFDP_LV_TO_SP; + /* + * full_reass will change the sfdp buf, need to restore it + * before returing. + */ + if (from_full_reass) + goto restore_sfdp_buf; + + return slowpath_needed; + } + norm &= i64x2_splat (x) != zero; + } + else + { + norm &= i64x2_splat ((1ULL << pr) & tcp_udp_bitmask) != zero; + } + swap = key_ip4_shuff_no_norm; + /* if norm is zero, we don't need to normalize so nothing happens here */ + swap += (key_ip4_shuff_norm - key_ip4_shuff_no_norm) & (u8x16) norm; + + /* overwrite first 4 bytes with first 0 - 4 bytes of l4 header */ + if (slow_path && l4_from_sv_reass) + { + u16 src_port, dst_port; + src_port = vnet_buffer (b)->ip.reass.l4_src_port; + dst_port = vnet_buffer (b)->ip.reass.l4_dst_port; + l4_hdr = dst_port << 16 | src_port; + /* Mask seqnum field out for ICMP */ + if (pr == IP_PROTOCOL_ICMP) + l4_hdr &= 0xff; + } + else if (slow_path) + l4_hdr = ((u32 *) next_header + l4_offset_32w[pr])[0] & + pow2_mask (l4_mask_bits[pr]); + else + l4_hdr = *(u32 *) next_header & pow2_mask (l4_mask_bits[pr]); + k = (u8x16) u32x4_insert ((u32x4) k, l4_hdr, 0); + + k = u8x16_shuffle_dynamic (k, swap); + + /* Reshuffle for ICMP + TODO: merge with fast path? */ + if (slow_path && pr == IP_PROTOCOL_ICMP) + k += u8x16_shuffle2 (k, zero, KEY_IP4_SWAP_ICMP); + lookup_val[0] = ((u32x4) norm)[0] & 0x1; + + /* extract tcp flags */ + if (slow_path && l4_from_sv_reass && pr == IP_PROTOCOL_TCP) + sfdp_buffer2 (b)->tcp_flags = + vnet_buffer (b)->ip.reass.icmp_type_or_tcp_flags; + else if (pr == IP_PROTOCOL_TCP) + sfdp_buffer (b)->tcp_flags = *(u8 *) next_header + 13; + else + sfdp_buffer (b)->tcp_flags = 0; + + /* store key */ + skey->ip4_key.as_u8x16 = k; + skey->context_id = context_id; + clib_memset (skey->zeros, 0, sizeof (skey->zeros)); + /* calculate hash */ + h[0] = clib_bihash_hash_24_8 ((clib_bihash_kv_24_8_t *) (skey)); + + if (slow_path && (l4_from_sv_reass || from_full_reass)) + { + restore_sfdp_buf: + /* Restore sfdp_buffer */ + /* TODO: optimise save/restore ? */ + sfdp_buffer (b)->flags = sfdp_buffer2 (b)->flags; + sfdp_buffer (b)->service_bitmap = sfdp_buffer2 (b)->service_bitmap; + sfdp_buffer (b)->tcp_flags = sfdp_buffer2 (b)->tcp_flags; + sfdp_buffer (b)->ip6_final_proto = sfdp_buffer2 (b)->ip6_final_proto; + sfdp_buffer (b)->tenant_index = sfdp_buffer2 (b)->tenant_index; + sfdp_buffer (b)->session_version_before_handoff = + sfdp_buffer2 (b)->session_version_before_handoff; + + /*Clear*/ + sfdp_buffer2 (b)->flags = 0; + sfdp_buffer2 (b)->service_bitmap = 0; + sfdp_buffer2 (b)->tcp_flags = 0; + sfdp_buffer2 (b)->ip6_final_proto = 0; + sfdp_buffer2 (b)->tenant_index = 0; + sfdp_buffer2 (b)->session_version_before_handoff = 0; + } + + /* If slowpath needed == 1, we may have done a lot of useless work that will + be overwritten, but we avoid too much branching in fastpath */ + return slowpath_needed; +} +#pragma GCC diagnostic pop +#endif \ No newline at end of file diff --git a/src/vnet/sfdp/lookup/lookup_ip6.h b/src/vnet/sfdp/lookup/lookup_ip6.h new file mode 100644 index 00000000000..87aedefb9ab --- /dev/null +++ b/src/vnet/sfdp/lookup/lookup_ip6.h @@ -0,0 +1,269 @@ +/* SPDX-License-Identifier: Apache-2.0 + * Copyright (c) 2025 Cisco Systems, Inc. + */ + +#ifndef __included_sfdp_lookup_ip6_h__ +#define __included_sfdp_lookup_ip6_h__ +#include +#include +#include +#include + +/*ICMP echo and reply are types 128 & 129 */ +static const u64 icmp6_type_ping_bitmask_128off = + (1ULL << (ICMP6_echo_request - 128)) | (1ULL << (ICMP6_echo_reply - 128)); + +static const u64 icmp6_type_errors_bitmask = + (1ULL << ICMP6_destination_unreachable) | (1ULL << ICMP6_time_exceeded); + +static const u64 icmp6_type_errors_bitmask_128off = + (1ULL << (ICMP6_redirect - 128)); + +#define KEY_IP6_SHUFF_NO_NORM_A 0, 1, 2, 3, -1, -1, 6, -1 +#define KEY_IP6_SHUFF_NORM_A 2, 3, 0, 1, -1, -1, 6, -1 +#define KEY_IP6_SHUFF_NO_NORM_B 0, 1, 2, 3, 4, 5, 6, 7 +#define KEY_IP6_SHUFF_NORM_B 4, 5, 6, 7, 0, 1, 2, 3 +#define IP6_BYTESWAP 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0 +#define KEY_IP6_SWAP_ICMP 2, 3, 0, 1, -1, -1, -1, -1 + +static const u8x8 key_ip6_shuff_no_norm_A = { KEY_IP6_SHUFF_NO_NORM_A }; +static const u8x8 key_ip6_shuff_norm_A = { KEY_IP6_SHUFF_NORM_A }; +static const u32x8 key_ip6_shuff_no_norm_B = { KEY_IP6_SHUFF_NO_NORM_B }; +static const u32x8 key_ip6_shuff_norm_B = { KEY_IP6_SHUFF_NORM_B }; +static const u8x8 key_ip6_swap_icmp = { KEY_IP6_SWAP_ICMP }; + +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wpsabi" +static_always_inline u8 +sfdp_calc_key_v6 (vlib_buffer_t *b, u32 context_id, + sfdp_session_ip6_key_t *skey, u64 *lookup_val, u64 *h, + i16 *l4_hdr_offset, u8 slow_path) +{ + u8 pr; + i64x2 norm, norm_reverse, zero = {}; + union + { + struct + { + u32x2u as_u32x2; + u32x8u as_u32x8; + }; + struct + { + u8x8u as_u8x8; + u8x16u as_u8x16[2]; + }; + struct + { + u64 as_u64; + u64x4u as_u64x4; + }; + } k; + u8x8 swap_A; + u32x8 swap_B; + STATIC_ASSERT_SIZEOF (k, 40); + u8x16 src_ip6, dst_ip6; + u32 l4_hdr; + void *next_header; + u8 *data = vlib_buffer_get_current (b); + ip6_header_t *ip = (void *) data; + int slowpath_needed; + u8 ext_hdr = 0; + u8 l4_from_sv_reass = 0; + u8 from_full_reass; + u8 tcp_or_udp; + u8 unknown_protocol; + + /* loads 40 bytes of ip6 header */ + k.as_u32x2 = *(u32x2u *) data; + k.as_u32x8 = *(u32x8u *) (data + 8); + + if (slow_path && PREDICT_FALSE (sfdp_buffer (b)->flags & + SFDP_BUFFER_FLAG_IP6_FINAL_PROTO_VALID)) + { + pr = sfdp_buffer (b)->ip6_final_proto; + ext_hdr = 0; + next_header = b->data + vnet_buffer (b)->l4_hdr_offset; + k.as_u8x8 = u8x8_insert (k.as_u8x8, pr, 6); /* use final proto in key */ + } + else + { + pr = ip->protocol; + ext_hdr = ip6_ext_hdr (pr); + next_header = ip6_next_header (ip); + } + + tcp_or_udp = pr == IP_PROTOCOL_TCP || pr == IP_PROTOCOL_UDP; + from_full_reass = + sfdp_buffer2 (b)->flags & SFDP_BUFFER_FLAG_FULL_REASSEMBLED; + slowpath_needed = !tcp_or_udp || from_full_reass; + + /* byteswap src and dst ip and splat into all 4 elts of u32x4, then + * compare so result will hold all ones if we need to swap src and dst + * signed vector type is used as */ + src_ip6 = u8x16_shuffle2 (k.as_u8x16[0], zero, IP6_BYTESWAP); + dst_ip6 = u8x16_shuffle2 (k.as_u8x16[1], zero, IP6_BYTESWAP); + norm = (u64x2) src_ip6 > (u64x2) dst_ip6; + norm_reverse = (u64x2) src_ip6 < (u64x2) dst_ip6; + norm = i64x2_splat (norm[1] | (~norm_reverse[1] & norm[0])); + + if (slow_path && sfdp_buffer2 (b)->flags & SFDP_BUFFER_FLAG_SV_REASSEMBLED) + { + /* This packet comes back from shallow virtual reassembly */ + l4_from_sv_reass = 1; + } + if (slow_path && ext_hdr) + { + /* Parse the extension header chain and look for fragmentation */ + ip6_ext_hdr_chain_t chain = { 0 }; + int res = + ip6_ext_header_walk (b, ip, IP_PROTOCOL_IPV6_FRAGMENTATION, &chain); + if (!(l4_from_sv_reass || from_full_reass) && res >= 0 && + chain.eh[res].protocol == IP_PROTOCOL_IPV6_FRAGMENTATION) + { + /* Reassembly is needed and has not been done yet */ + lookup_val[0] = (u64) SFDP_SP_NODE_IP6_REASS << 32 | SFDP_LV_TO_SP; + return slowpath_needed; + } + else + { + next_header = + ip6_ext_next_header_offset (ip, chain.eh[chain.length - 1].offset); + pr = chain.eh[chain.length - 1].protocol; + tcp_or_udp = pr == IP_PROTOCOL_TCP || pr == IP_PROTOCOL_UDP; + k.as_u8x8 = + u8x8_insert (k.as_u8x8, pr, 6); /* use final proto in key */ + } + } + l4_hdr_offset[0] = (u8 *) next_header - b[0].data; + unknown_protocol = !tcp_or_udp && pr != IP_PROTOCOL_ICMP6; + + if (slow_path && unknown_protocol) + { + lookup_val[0] = + (u64) SFDP_SP_NODE_IP6_UNKNOWN_PROTO << 32 | SFDP_LV_TO_SP; + /* + * full_reass will change the sfdp buf, need to restore it + * before returing. + */ + if (from_full_reass) + goto restore_sfdp_buf; + + return slowpath_needed; + } + + if (slow_path && pr == IP_PROTOCOL_ICMP6) + { + u8 type; + i64 x, y, t, t128; + if (l4_from_sv_reass) + type = vnet_buffer (b)->ip.reass.icmp_type_or_tcp_flags; + else + { + icmp46_header_t *icmp = next_header; + type = icmp->type; + } + t = (1ULL << type); + t128 = (1ULL << ((u8) (type - 128))); + x = t128 & icmp6_type_ping_bitmask_128off; + y = t & icmp6_type_errors_bitmask; + y |= t128 & icmp6_type_errors_bitmask_128off; + if (x == 0) + { + /* If it's an known ICMP error, treat in the specific slowpath (with + a lookup on inner packet), otherwise, it's an unknown protocol */ + lookup_val[0] = + y ? (u64) SFDP_SP_NODE_IP6_ICMP6_ERROR << 32 | SFDP_LV_TO_SP : + (u64) SFDP_SP_NODE_IP6_UNKNOWN_PROTO << 32 | SFDP_LV_TO_SP; + /* + * full_reass will change the sfdp buf, need to restore it + * before returing. + */ + if (from_full_reass) + goto restore_sfdp_buf; + + return slowpath_needed; + } + norm &= i64x2_splat (x) != zero; + } + else + { + norm &= i64x2_splat ((1ULL << pr) & tcp_udp_bitmask) != zero; + } + swap_A = key_ip6_shuff_no_norm_A; + swap_B = key_ip6_shuff_no_norm_B; + + /* if norm is zero, we don't need to normalize so nothing happens here */ + swap_A += (key_ip6_shuff_norm_A - key_ip6_shuff_no_norm_A) & (u8x8) norm[0]; + swap_B += + (key_ip6_shuff_norm_B - key_ip6_shuff_no_norm_B) & u32x8_splat (norm[0]); + + /* overwrite first 4 bytes with first 0 - 4 bytes of l4 header */ + if (slow_path && l4_from_sv_reass) + { + u16 src_port, dst_port; + src_port = vnet_buffer (b)->ip.reass.l4_src_port; + dst_port = vnet_buffer (b)->ip.reass.l4_dst_port; + l4_hdr = dst_port << 16 | src_port; + /* Mask seqnum field out for ICMP */ + if (pr == IP_PROTOCOL_ICMP6) + l4_hdr &= 0xff; + } + else if (slow_path) + l4_hdr = ((u32 *) next_header + l4_offset_32w[pr])[0] & + pow2_mask (l4_mask_bits[pr]); + else + l4_hdr = *(u32 *) next_header & pow2_mask (l4_mask_bits[pr]); + + k.as_u32x2 = u32x2_insert (k.as_u32x2, l4_hdr, 0); + + k.as_u8x8 = u8x8_shuffle (k.as_u8x8, swap_A); + k.as_u32x8 = u32x8_shuffle_dynamic (k.as_u32x8, swap_B); + /* Reshuffle for ICMP + TODO: merge with fast path? */ + if (slow_path && pr == IP_PROTOCOL_ICMP6) + k.as_u8x8 += u8x8_shuffle (k.as_u8x8, key_ip6_swap_icmp); + lookup_val[0] = ((u32x4) norm)[0] & 0x1; + + /* extract tcp flags */ + if (slow_path && l4_from_sv_reass && pr == IP_PROTOCOL_TCP) + sfdp_buffer2 (b)->tcp_flags = + vnet_buffer (b)->ip.reass.icmp_type_or_tcp_flags; + else if (pr == IP_PROTOCOL_TCP) + sfdp_buffer (b)->tcp_flags = *(u8 *) next_header + 13; + else + sfdp_buffer (b)->tcp_flags = 0; + + /* store key */ + skey->ip6_key.as_u64 = k.as_u64; + skey->ip6_key.as_u64x4 = k.as_u64x4; + skey->context_id = context_id; + clib_memset (skey->zeros, 0, sizeof (skey->zeros)); + /* calculate hash */ + h[0] = clib_bihash_hash_48_8 ((clib_bihash_kv_48_8_t *) (skey)); + + if (slow_path && (l4_from_sv_reass || from_full_reass)) + { + restore_sfdp_buf: + /* Restore sfdp_buffer */ + /* TODO: optimise save/restore ? */ + sfdp_buffer (b)->flags = sfdp_buffer2 (b)->flags; + sfdp_buffer (b)->service_bitmap = sfdp_buffer2 (b)->service_bitmap; + sfdp_buffer (b)->tcp_flags = sfdp_buffer2 (b)->tcp_flags; + sfdp_buffer (b)->tenant_index = sfdp_buffer2 (b)->tenant_index; + sfdp_buffer (b)->session_version_before_handoff = + sfdp_buffer2 (b)->session_version_before_handoff; + + /*Clear*/ + sfdp_buffer2 (b)->flags = 0; + sfdp_buffer2 (b)->service_bitmap = 0; + sfdp_buffer2 (b)->tcp_flags = 0; + sfdp_buffer2 (b)->tenant_index = 0; + sfdp_buffer2 (b)->session_version_before_handoff = 0; + } + /* If slowpath needed == 1, we may have done a lot of useless work that will + be overwritten, but we avoid too much branching in fastpath */ + return slowpath_needed; +} +#pragma GCC diagnostic pop +#endif /* __included_sfdp_lookup_ip6_h__ */ \ No newline at end of file diff --git a/src/vnet/sfdp/lookup/node.c b/src/vnet/sfdp/lookup/node.c new file mode 100644 index 00000000000..c6861cc6dca --- /dev/null +++ b/src/vnet/sfdp/lookup/node.c @@ -0,0 +1,938 @@ +/* SPDX-License-Identifier: Apache-2.0 + * Copyright (c) 2025 Cisco Systems, Inc. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "lookup_inlines.h" +#include "lookup.h" + +#define foreach_sfdp_handoff_error \ + _ (SESS_DROP, sess_drop, INFO, "Session expired during handoff") \ + _ (NOERROR, noerror, INFO, "no error") + +typedef enum +{ +#define _(f, n, s, d) SFDP_HANDOFF_ERROR_##f, + foreach_sfdp_handoff_error +#undef _ + SFDP_HANDOFF_N_ERROR, +} sfdp_handoff_error_t; + +static vlib_error_desc_t sfdp_handoff_error_counters[] = { +#define _(f, n, s, d) { #n, d, VL_COUNTER_SEVERITY_##s }, + foreach_sfdp_handoff_error +#undef _ +}; + +typedef struct +{ + u32 sw_if_index; + union + { + sfdp_session_ip4_key_t k4; + sfdp_session_ip6_key_t k6; + }; + u8 is_ip6; + u8 is_sp; + union + { + struct + { + u32 next_index; + u64 hash; + u32 flow_id; + }; + struct + { + u32 sp_index; + u32 sp_node_index; + }; + }; +} sfdp_lookup_trace_t; + +typedef struct +{ + u32 next_index; + u32 flow_id; +} sfdp_handoff_trace_t; + +static_always_inline int +sfdp_create_session_v4 (sfdp_main_t *sfdp, sfdp_per_thread_data_t *ptd, + sfdp_tenant_t *tenant, u16 tenant_idx, + u32 thread_index, f64 time_now, void *k, u64 *h, + u64 *lookup_val, u32 scope_index) +{ + return sfdp_create_session_inline (sfdp, ptd, tenant, tenant_idx, + thread_index, time_now, k, h, lookup_val, + scope_index, 0); +} + +static_always_inline int +sfdp_create_session_v6 (sfdp_main_t *sfdp, sfdp_per_thread_data_t *ptd, + sfdp_tenant_t *tenant, u16 tenant_idx, + u32 thread_index, f64 time_now, void *k, u64 *h, + u64 *lookup_val, u32 scope_index) +{ + return sfdp_create_session_inline (sfdp, ptd, tenant, tenant_idx, + thread_index, time_now, k, h, lookup_val, + scope_index, 1); +} + +static_always_inline u8 +sfdp_lookup_four_v4 (vlib_buffer_t **b, sfdp_session_ip4_key_t *k, + u64 *lookup_val, u64 *h, i16 *l4_hdr_offset, + int prefetch_buffer_stride, u8 slowpath) +{ + vlib_buffer_t **pb = b + prefetch_buffer_stride; + u8 slowpath_needed = 0; + if (prefetch_buffer_stride) + { + clib_prefetch_load (pb[0]); + clib_prefetch_load (pb[0]->data); + } + + slowpath_needed |= + sfdp_calc_key_v4 (b[0], b[0]->flow_id, k + 0, lookup_val + 0, h + 0, + l4_hdr_offset + 0, slowpath); + + if (prefetch_buffer_stride) + { + clib_prefetch_load (pb[1]); + clib_prefetch_load (pb[1]->data); + } + + slowpath_needed |= + sfdp_calc_key_v4 (b[1], b[1]->flow_id, k + 1, lookup_val + 1, h + 1, + l4_hdr_offset + 1, slowpath); + + if (prefetch_buffer_stride) + { + clib_prefetch_load (pb[2]); + clib_prefetch_load (pb[2]->data); + } + + slowpath_needed |= + sfdp_calc_key_v4 (b[2], b[2]->flow_id, k + 2, lookup_val + 2, h + 2, + l4_hdr_offset + 2, slowpath); + + if (prefetch_buffer_stride) + { + clib_prefetch_load (pb[3]); + clib_prefetch_load (pb[3]->data); + } + + slowpath_needed |= + sfdp_calc_key_v4 (b[3], b[3]->flow_id, k + 3, lookup_val + 3, h + 3, + l4_hdr_offset + 3, slowpath); + return slowpath_needed; +} + +static_always_inline u8 +sfdp_lookup_four_v6 (vlib_buffer_t **b, sfdp_session_ip6_key_t *k, + u64 *lookup_val, u64 *h, i16 *l4_hdr_offset, + int prefetch_buffer_stride, u8 slowpath) +{ + vlib_buffer_t **pb = b + prefetch_buffer_stride; + u8 slowpath_needed = 0; + if (prefetch_buffer_stride) + { + clib_prefetch_load (pb[0]); + clib_prefetch_load (pb[0]->data); + } + + slowpath_needed |= + sfdp_calc_key_v6 (b[0], b[0]->flow_id, k + 0, lookup_val + 0, h + 0, + l4_hdr_offset + 0, slowpath); + + if (prefetch_buffer_stride) + { + clib_prefetch_load (pb[1]); + clib_prefetch_load (pb[1]->data); + } + + slowpath_needed |= + sfdp_calc_key_v6 (b[1], b[1]->flow_id, k + 1, lookup_val + 1, h + 1, + l4_hdr_offset + 1, slowpath); + + if (prefetch_buffer_stride) + { + clib_prefetch_load (pb[2]); + clib_prefetch_load (pb[2]->data); + } + + slowpath_needed |= + sfdp_calc_key_v6 (b[2], b[2]->flow_id, k + 2, lookup_val + 2, h + 2, + l4_hdr_offset + 2, slowpath); + + if (prefetch_buffer_stride) + { + clib_prefetch_load (pb[3]); + clib_prefetch_load (pb[3]->data); + } + + slowpath_needed |= + sfdp_calc_key_v6 (b[3], b[3]->flow_id, k + 3, lookup_val + 3, h + 3, + l4_hdr_offset + 3, slowpath); + return slowpath_needed; +} + +static_always_inline void +sfdp_prepare_all_keys_v4_slow (vlib_buffer_t **b, sfdp_session_ip4_key_t *k, + u64 *lv, u64 *h, i16 *l4_hdr_offset, + u32 n_left); + +static_always_inline void +sfdp_prepare_all_keys_v6_slow (vlib_buffer_t **b, sfdp_session_ip6_key_t *k, + u64 *lv, u64 *h, i16 *l4_hdr_offset, + u32 n_left); + +static_always_inline uword +sfdp_prepare_all_keys_v4 (vlib_buffer_t **b, sfdp_session_ip4_key_t *k, + u64 *lv, u64 *h, i16 *l4_hdr_offset, u32 n_left, + u8 slowpath) +{ + /* main loop - prefetch next 4 buffers, + * prefetch previous 4 buckets */ + while (n_left >= 8) + { + if (sfdp_lookup_four_v4 (b, k, lv, h, l4_hdr_offset, 4, slowpath) && + !slowpath) + return n_left; + + b += 4; + k += 4; + lv += 4; + h += 4; + l4_hdr_offset += 4; + n_left -= 4; + } + + /* last 4 packets - dont prefetch next 4 buffers, + * prefetch previous 4 buckets */ + if (n_left >= 4) + { + if (sfdp_lookup_four_v4 (b, k, lv, h, l4_hdr_offset, 0, slowpath) && + !slowpath) + return n_left; + + b += 4; + k += 4; + lv += 4; + h += 4; + l4_hdr_offset += 4; + n_left -= 4; + } + + while (n_left > 0) + { + if (sfdp_calc_key_v4 (b[0], b[0]->flow_id, k + 0, lv + 0, h + 0, + l4_hdr_offset + 0, slowpath) && + !slowpath) + return n_left; + + b += 1; + k += 1; + lv += 1; + h += 1; + l4_hdr_offset += 1; + n_left -= 1; + } + return 0; +} + +static_always_inline uword +sfdp_prepare_all_keys_v6 (vlib_buffer_t **b, sfdp_session_ip6_key_t *k, + u64 *lv, u64 *h, i16 *l4_hdr_offset, u32 n_left, + u8 slowpath) +{ + /* main loop - prefetch next 4 buffers, + * prefetch previous 4 buckets */ + while (n_left >= 8) + { + if (sfdp_lookup_four_v6 (b, k, lv, h, l4_hdr_offset, 4, slowpath) && + !slowpath) + return n_left; + + b += 4; + k += 4; + lv += 4; + h += 4; + l4_hdr_offset += 4; + n_left -= 4; + } + + /* last 4 packets - dont prefetch next 4 buffers, + * prefetch previous 4 buckets */ + if (n_left >= 4) + { + if (sfdp_lookup_four_v6 (b, k, lv, h, l4_hdr_offset, 0, slowpath) && + !slowpath) + return n_left; + + b += 4; + k += 4; + lv += 4; + h += 4; + l4_hdr_offset += 4; + n_left -= 4; + } + + while (n_left > 0) + { + if (sfdp_calc_key_v6 (b[0], b[0]->flow_id, k + 0, lv + 0, h + 0, + l4_hdr_offset, slowpath) && + !slowpath) + return n_left; + + b += 1; + k += 1; + lv += 1; + h += 1; + l4_hdr_offset += 1; + n_left -= 1; + } + return 0; +} + +static_always_inline void +sfdp_prepare_all_keys_v4_slow (vlib_buffer_t **b, sfdp_session_ip4_key_t *k, + u64 *lv, u64 *h, i16 *l4_hdr_offset, u32 n_left) +{ + sfdp_prepare_all_keys_v4 (b, k, lv, h, l4_hdr_offset, n_left, 1); +} +static_always_inline uword +sfdp_prepare_all_keys_v4_fast (vlib_buffer_t **b, sfdp_session_ip4_key_t *k, + u64 *lv, u64 *h, i16 *l4_hdr_offset, u32 n_left) +{ + return sfdp_prepare_all_keys_v4 (b, k, lv, h, l4_hdr_offset, n_left, 0); +} + +static_always_inline void +sfdp_prepare_all_keys_v6_slow (vlib_buffer_t **b, sfdp_session_ip6_key_t *k, + u64 *lv, u64 *h, i16 *l4_hdr_offset, u32 n_left) +{ + sfdp_prepare_all_keys_v6 (b, k, lv, h, l4_hdr_offset, n_left, 1); +} + +static_always_inline uword +sfdp_prepare_all_keys_v6_fast (vlib_buffer_t **b, sfdp_session_ip6_key_t *k, + u64 *lv, u64 *h, i16 *l4_hdr_offset, u32 n_left) +{ + return sfdp_prepare_all_keys_v6 (b, k, lv, h, l4_hdr_offset, n_left, 0); +} + +static_always_inline uword +sfdp_lookup_inline (vlib_main_t *vm, vlib_node_runtime_t *node, + vlib_frame_t *frame, u8 is_ipv6) +{ + sfdp_main_t *sfdp = &sfdp_main; + u32 thread_index = vm->thread_index; + sfdp_per_thread_data_t *ptd = + vec_elt_at_index (sfdp->per_thread_data, thread_index); + vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b; + sfdp_bihash_kv46_t kv = {}; + sfdp_tenant_t *tenant; + sfdp_session_t *session; + u32 session_index; + u32 *bi, *from = vlib_frame_vector_args (frame); + u32 n_left = frame->n_vectors; + u32 to_local[VLIB_FRAME_SIZE], n_local = 0; + u32 to_remote[VLIB_FRAME_SIZE], n_remote = 0; + u32 to_sp[VLIB_FRAME_SIZE], n_to_sp = 0; + u16 thread_indices[VLIB_FRAME_SIZE]; + u16 local_next_indices[VLIB_FRAME_SIZE]; + u32 sp_indices[VLIB_FRAME_SIZE]; + u32 sp_node_indices[VLIB_FRAME_SIZE]; + vlib_buffer_t *local_bufs[VLIB_FRAME_SIZE]; + vlib_buffer_t *to_sp_bufs[VLIB_FRAME_SIZE]; + u32 local_flow_indices[VLIB_FRAME_SIZE]; + u32 created_session_indices[VLIB_FRAME_SIZE], n_created = 0; + SFDP_SESSION_IP46_KEYS_TYPE (VLIB_FRAME_SIZE) keys; + + sfdp_session_ip4_key_t *k4 = keys.keys4; + sfdp_session_ip6_key_t *k6 = keys.keys6; + + u64 hashes[VLIB_FRAME_SIZE], *h = hashes; + u32 lengths[VLIB_FRAME_SIZE], *len = lengths; + i16 l4_hdr_off[VLIB_FRAME_SIZE], *l4o = l4_hdr_off; + f64 time_now = vlib_time_now (vm); + /* lookup_vals contains: + * - (Phase 1) to_slow_path_node (1bit) + ||| slow_path_node_index (31bits) + * ||| zeros(31bits) + * ||| + * ||| packet_dir (1bit) + * + * - (Phase 2) session_version + thread_index + flow_index . Cf. sfdp.h + OR same as Phase 1 if slow path + ASSUMPTION: thread index < 2^31 */ + u64 __attribute__ ((aligned (32))) lookup_vals[VLIB_FRAME_SIZE], + *lv = lookup_vals; + __clib_unused u16 hit_count = 0; + uword n_left_slow_keys; + sfdp_lookup_node_runtime_data_t *rt = (void *) node->runtime_data; + u32 scope_index = rt->scope_index; + u32 fqi = + vec_elt_at_index (sfdp->frame_queue_index_per_scope, scope_index)[0]; + + vlib_get_buffers (vm, from, bufs, n_left); + b = bufs; + + if (is_ipv6) + { + if (PREDICT_FALSE ((n_left_slow_keys = sfdp_prepare_all_keys_v6_fast ( + b, k6, lv, h, l4o, n_left)))) + { + uword n_done = n_left - n_left_slow_keys; + sfdp_prepare_all_keys_v6_slow (b + n_done, k6 + n_done, lv + n_done, + h + n_done, l4o + n_done, + n_left_slow_keys); + } + } + else + { + if (PREDICT_FALSE ((n_left_slow_keys = sfdp_prepare_all_keys_v4_fast ( + b, k4, lv, h, l4o, n_left)))) + { + uword n_done = n_left - n_left_slow_keys; + sfdp_prepare_all_keys_v4_slow (b + n_done, k4 + n_done, lv + n_done, + h + n_done, l4o + n_done, + n_left_slow_keys); + } + } + + if (is_ipv6) + while (n_left) + { + if (PREDICT_TRUE (n_left > 8)) + clib_bihash_prefetch_bucket_48_8 (&sfdp->table6, h[8]); + + if (PREDICT_TRUE (n_left > 1)) + vlib_prefetch_buffer_header (b[1], STORE); + + if (PREDICT_FALSE (lv[0] & SFDP_LV_TO_SP)) + goto next_pkt6; + + clib_memcpy_fast (&kv.kv6.key, k6, 48); + if (clib_bihash_search_inline_with_hash_48_8 (&sfdp->table6, h[0], + &kv.kv6)) + { + u16 tenant_idx = sfdp_buffer (b[0])->tenant_index; + int rv; + tenant = sfdp_tenant_at_index (sfdp, tenant_idx); + rv = sfdp_create_session_v6 (sfdp, ptd, tenant, tenant_idx, + thread_index, time_now, k6, h, lv, + scope_index); + if (PREDICT_FALSE (rv == 1)) + { + vlib_node_increment_counter ( + vm, node->node_index, SFDP_LOOKUP_ERROR_TABLE_OVERFLOW, 1); + lv[0] = + (u64) SFDP_SP_NODE_IP6_TABLE_OVERFLOW << 32 | SFDP_LV_TO_SP; + goto next_pkt6; + } + else if (rv == 2) + { + vlib_node_increment_counter (vm, node->node_index, + SFDP_LOOKUP_ERROR_COLLISION, 1); + continue; /* if there is colision, we just reiterate */ + } + created_session_indices[n_created] = + sfdp_session_index_from_lookup (lv[0]); + n_created++; + } + else + { + lv[0] ^= kv.kv6.value; + hit_count++; + } + + b[0]->flow_id = sfdp_pseudo_flow_index_from_lookup (lv[0]); + + next_pkt6: + b[0]->flags |= VNET_BUFFER_F_L4_HDR_OFFSET_VALID; + vnet_buffer (b[0])->l4_hdr_offset = l4o[0]; + len[0] = vlib_buffer_length_in_chain (vm, b[0]); + + b += 1; + n_left -= 1; + k6 += 1; + h += 1; + lv += 1; + len += 1; + } + else + while (n_left) + { + if (PREDICT_TRUE (n_left > 8)) + clib_bihash_prefetch_bucket_24_8 (&sfdp->table4, h[8]); + + if (PREDICT_TRUE (n_left > 1)) + vlib_prefetch_buffer_header (b[1], STORE); + + if (PREDICT_FALSE (lv[0] & SFDP_LV_TO_SP)) + goto next_pkt4; + + clib_memcpy_fast (&kv.kv4.key, k4, 24); + if (clib_bihash_search_inline_with_hash_24_8 (&sfdp->table4, h[0], + &kv.kv4)) + { + u16 tenant_idx = sfdp_buffer (b[0])->tenant_index; + int rv; + tenant = sfdp_tenant_at_index (sfdp, tenant_idx); + rv = sfdp_create_session_v4 (sfdp, ptd, tenant, tenant_idx, + thread_index, time_now, k4, h, lv, + scope_index); + if (PREDICT_FALSE (rv == 1)) + { + vlib_node_increment_counter ( + vm, node->node_index, SFDP_LOOKUP_ERROR_TABLE_OVERFLOW, 1); + lv[0] = + (u64) SFDP_SP_NODE_IP4_TABLE_OVERFLOW << 32 | SFDP_LV_TO_SP; + goto next_pkt4; + } + else if (rv == 2) + { + vlib_node_increment_counter (vm, node->node_index, + SFDP_LOOKUP_ERROR_COLLISION, 1); + continue; /* if there is colision, we just reiterate */ + } + created_session_indices[n_created] = + sfdp_session_index_from_lookup (lv[0]); + n_created++; + } + else + { + lv[0] ^= kv.kv4.value; + hit_count++; + } + + b[0]->flow_id = sfdp_pseudo_flow_index_from_lookup (lv[0]); + + next_pkt4: + b[0]->flags |= VNET_BUFFER_F_L4_HDR_OFFSET_VALID; + vnet_buffer (b[0])->l4_hdr_offset = l4o[0]; + len[0] = vlib_buffer_length_in_chain (vm, b[0]); + + b += 1; + n_left -= 1; + k4 += 1; + h += 1; + lv += 1; + len += 1; + l4o += 1; + } + + // Notify created sessions + if (n_created) + { + sfdp_notify_new_sessions (sfdp, created_session_indices, n_created); + } + + n_left = frame->n_vectors; + lv = lookup_vals; + b = bufs; + bi = from; + len = lengths; + while (n_left) + { + u16 flow_thread_index; + u32 flow_index; + session_version_t session_version; + vlib_combined_counter_main_t *vcm; + + if (lv[0] & SFDP_LV_TO_SP) + { + to_sp[n_to_sp] = bi[0]; + sp_indices[n_to_sp] = (lv[0] & ~(SFDP_LV_TO_SP)) >> 32; + to_sp_bufs[n_to_sp] = b[0]; + n_to_sp++; + goto next_packet2; + } + + flow_thread_index = sfdp_thread_index_from_lookup (lv[0]); + flow_index = sfdp_pseudo_flow_index_from_lookup (lv[0]); + session_index = flow_index >> 1; + vcm = &sfdp->per_session_ctr[SFDP_FLOW_COUNTER_LOOKUP]; + session_version = sfdp_session_version_from_lookup (lv[0]); + vlib_increment_combined_counter (vcm, thread_index, flow_index, 1, + len[0]); + if (PREDICT_FALSE (flow_thread_index == SFDP_UNBOUND_THREAD_INDEX)) + { + flow_thread_index = thread_index; + sfdp_session_bind_to_thread (session_index, &flow_thread_index, 1); + /* flow_thread_index now necessarily contains the actual thread index + * of the session */ + } + if (flow_thread_index == thread_index) + { + /* known flow which belongs to this thread */ + to_local[n_local] = bi[0]; + local_flow_indices[n_local] = flow_index; + local_bufs[n_local] = b[0]; + n_local++; + } + else + { + /* known flow which belongs to remote thread */ + to_remote[n_remote] = bi[0]; + thread_indices[n_remote] = flow_thread_index; + /* Store the current session version in buffer to check if it's still + * valid after handoff */ + sfdp_buffer (b[0])->session_version_before_handoff = session_version; + n_remote++; + } + next_packet2: + n_left -= 1; + lv += 1; + b += 1; + bi += 1; + len += 1; + } + + /* handover buffers to remote node */ + if (n_remote) + { + u32 n_remote_enq; + n_remote_enq = vlib_buffer_enqueue_to_thread ( + vm, node, fqi, to_remote, thread_indices, n_remote, 1); + vlib_node_increment_counter (vm, node->node_index, + SFDP_LOOKUP_ERROR_REMOTE, n_remote_enq); + vlib_node_increment_counter (vm, node->node_index, + SFDP_LOOKUP_ERROR_CON_DROP, + n_remote - n_remote_enq); + } + + /* enqueue local */ + if (n_local) + { + u16 *current_next = local_next_indices; + u32 *local_flow_index = local_flow_indices; + uword session_scope_index; + b = local_bufs; + n_left = n_local; + + /* TODO: prefetch session and buffer + 4 loop */ + while (n_left) + { + session_index = local_flow_index[0] >> 1; + session = sfdp_session_at_index (session_index); + session_scope_index = session->scope_index; + if (PREDICT_TRUE (session_scope_index == scope_index)) + { + sfdp_bitmap_t pbmp = + session->bitmaps[sfdp_direction_from_flow_index ( + local_flow_index[0])]; + sfdp_buffer (b[0])->service_bitmap = pbmp; + + /* The tenant of the buffer is the tenant of the session */ + sfdp_buffer (b[0])->tenant_index = session->tenant_idx; + + sfdp_next (b[0], current_next); + } + else + current_next[0] = + SFDP_LOOKUP_NEXT_INDEX_FOR_SCOPE (session_scope_index); + + local_flow_index += 1; + current_next += 1; + b += 1; + n_left -= 1; + } + vlib_buffer_enqueue_to_next (vm, node, to_local, local_next_indices, + n_local); + vlib_node_increment_counter (vm, node->node_index, + SFDP_LOOKUP_ERROR_LOCAL, n_local); + } + + if (n_to_sp) + { + vlib_frame_t *f = NULL; + u32 *current_next_slot = NULL; + u32 current_left_to_next = 0; + u32 *current_to_sp = to_sp; + u32 *sp_index = sp_indices; + u32 *sp_node_index = sp_node_indices; + u32 last_node_index = VLIB_INVALID_NODE_INDEX; + + b = to_sp_bufs; + n_left = n_to_sp; + + while (n_left) + { + u32 node_index; + u16 tenant_idx; + sfdp_tenant_t *tenant; + + tenant_idx = sfdp_buffer (b[0])->tenant_index; + tenant = sfdp_tenant_at_index (sfdp, tenant_idx); + node_index = tenant->sp_node_indices[sp_index[0]]; + sp_node_index[0] = node_index; + + if (PREDICT_FALSE (node_index != last_node_index) || + current_left_to_next == 0) + { + if (f != NULL) + vlib_put_frame_to_node (vm, last_node_index, f); + f = vlib_get_frame_to_node (vm, node_index); + f->frame_flags |= node->flags & VLIB_NODE_FLAG_TRACE; + current_next_slot = vlib_frame_vector_args (f); + current_left_to_next = VLIB_FRAME_SIZE; + last_node_index = node_index; + } + + current_next_slot[0] = current_to_sp[0]; + + f->n_vectors += 1; + current_to_sp += 1; + b += 1; + sp_index += 1; + sp_node_index += 1; + current_next_slot += 1; + + current_left_to_next -= 1; + n_left -= 1; + } + vlib_put_frame_to_node (vm, last_node_index, f); + } + + if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE))) + { + int i; + b = bufs; + bi = from; + h = hashes; + u32 *in_local = to_local; + u32 *in_remote = to_remote; + u32 *in_sp = to_sp; + for (i = 0; i < frame->n_vectors; i++) + { + if (b[0]->flags & VLIB_BUFFER_IS_TRACED) + { + sfdp_lookup_trace_t *t = + vlib_add_trace (vm, node, b[0], sizeof (*t)); + t->sw_if_index = vnet_buffer (b[0])->sw_if_index[VLIB_RX]; + t->flow_id = b[0]->flow_id; + t->hash = h[0]; + t->is_sp = 0; + if (bi[0] == in_local[0]) + { + t->next_index = local_next_indices[(in_local++) - to_local]; + } + else if (bi[0] == in_remote[0]) + { + t->next_index = ~0; + in_remote++; + } + else + { + t->is_sp = 1; + t->sp_index = sp_indices[in_sp - to_sp]; + t->sp_node_index = sp_node_indices[in_sp - to_sp]; + in_sp++; + } + + if ((t->is_ip6 = is_ipv6)) + clib_memcpy (&t->k6, &keys.keys6[i], sizeof (t->k6)); + else + clib_memcpy (&t->k4, &keys.keys4[i], sizeof (t->k4)); + + bi++; + b++; + h++; + } + else + break; + } + } + return frame->n_vectors; +} + +VLIB_NODE_FN (sfdp_lookup_ip4_node) +(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame) +{ + return sfdp_lookup_inline (vm, node, frame, 0); +} + +VLIB_NODE_FN (sfdp_lookup_ip6_node) +(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame) +{ + return sfdp_lookup_inline (vm, node, frame, 1); +} + +VLIB_NODE_FN (sfdp_handoff_node) +(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame) +{ + vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b; + u32 *from = vlib_frame_vector_args (frame), *bi = from; + u32 n_left = frame->n_vectors; + u16 next_indices[VLIB_FRAME_SIZE], *current_next; + u32 next_buffers[VLIB_FRAME_SIZE], *next_buffer = next_buffers; + u32 drop_buffers[VLIB_FRAME_SIZE], *drop_buffer = drop_buffers; + size_t n_next = 0, n_drop = 0; + sfdp_lookup_node_runtime_data_t *rt = (void *) node->runtime_data; + u32 scope_index = rt->scope_index; + + vlib_get_buffers (vm, from, bufs, n_left); + b = bufs; + current_next = next_indices; + + /*TODO: prefetch, quad or octo loop...*/ + while (n_left) + { + u32 flow_index = b[0]->flow_id; + u32 session_index = flow_index >> 1; + + // Get session if valid and if session_version didn't change + sfdp_session_t *session = sfdp_session_at_index_if_valid (session_index); + if (PREDICT_TRUE (session && + session->session_version == + sfdp_buffer (b[0])->session_version_before_handoff)) + { + u32 session_scope_index = session->scope_index; + if (PREDICT_TRUE (scope_index == session_scope_index)) + { + sfdp_bitmap_t pbmp = + session->bitmaps[sfdp_direction_from_flow_index (flow_index)]; + sfdp_buffer (b[0])->service_bitmap = pbmp; + sfdp_next (b[0], current_next); + } + else + current_next[0] = + SFDP_LOOKUP_NEXT_INDEX_FOR_SCOPE (session_scope_index); + + *next_buffer = *bi; + current_next += 1; + next_buffer += 1; + n_next += 1; + } + else + { + // drop if session doesn't exist anymore + *drop_buffer = *bi; + drop_buffer += 1; + n_drop++; + } + + b += 1; + bi += 1; + n_left -= 1; + } + vlib_buffer_enqueue_to_next (vm, node, next_buffers, next_indices, n_next); + vlib_buffer_free (vm, drop_buffers, n_drop); + vlib_node_increment_counter (vm, node->node_index, + SFDP_HANDOFF_ERROR_SESS_DROP, n_drop); + if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE))) + { + int i; + b = bufs; + current_next = next_indices; + for (i = 0; i < frame->n_vectors; i++) + { + if (b[0]->flags & VLIB_BUFFER_IS_TRACED) + { + sfdp_handoff_trace_t *t = + vlib_add_trace (vm, node, b[0], sizeof (*t)); + t->flow_id = b[0]->flow_id; + t->next_index = current_next[0]; + b++; + current_next++; + } + else + break; + } + } + return frame->n_vectors; +} + +static u8 * +format_sfdp_lookup_trace (u8 *s, va_list *args) +{ + vlib_main_t *vm = va_arg (*args, vlib_main_t *); + vlib_node_t __clib_unused *node = va_arg (*args, vlib_node_t *); + sfdp_lookup_trace_t *t = va_arg (*args, sfdp_lookup_trace_t *); + + if (!t->is_sp) + s = format (s, + "sfdp-lookup: sw_if_index %d, next index %d hash 0x%x " + "flow-id %u (session %u, %s) key 0x%U", + t->sw_if_index, t->next_index, t->hash, t->flow_id, + t->flow_id >> 1, t->flow_id & 0x1 ? "reverse" : "forward", + format_hex_bytes_no_wrap, + t->is_ip6 ? (u8 *) &t->k6 : (u8 *) &t->k4, + t->is_ip6 ? sizeof (t->k6) : sizeof (t->k4)); + else + s = format (s, + "sfdp-lookup: sw_if_index %d, slow-path (%U) " + "slow-path node %U key 0x%U", + t->sw_if_index, format_sfdp_sp_node, t->sp_index, + format_vlib_node_name, vm, t->sp_node_index, + format_hex_bytes_no_wrap, + t->is_ip6 ? (u8 *) &t->k6 : (u8 *) &t->k4, + t->is_ip6 ? sizeof (t->k6) : sizeof (t->k4)); + return s; +} + +static u8 * +format_sfdp_handoff_trace (u8 *s, va_list *args) +{ + vlib_main_t __clib_unused *vm = va_arg (*args, vlib_main_t *); + vlib_node_t __clib_unused *node = va_arg (*args, vlib_node_t *); + sfdp_handoff_trace_t *t = va_arg (*args, sfdp_handoff_trace_t *); + + s = format (s, + "sfdp-handoff: next index %d " + "flow-id %u (session %u, %s)", + t->next_index, t->flow_id, t->flow_id >> 1, + t->flow_id & 0x1 ? "reverse" : "forward"); + return s; +} + +static sfdp_lookup_node_runtime_data_t lookup_rt_data_default = { + .scope_index = 0 +}; + +VLIB_REGISTER_NODE (sfdp_lookup_ip4_node) = { + .name = "sfdp-lookup-ip4", + .vector_size = sizeof (u32), + .format_trace = format_sfdp_lookup_trace, + .type = VLIB_NODE_TYPE_INTERNAL, + .flags = VLIB_NODE_FLAG_ALLOW_LAZY_NEXT_NODES, + .runtime_data = &lookup_rt_data_default, + .runtime_data_bytes = sizeof (lookup_rt_data_default), + .n_errors = ARRAY_LEN (sfdp_lookup_error_strings), + .error_strings = sfdp_lookup_error_strings, +}; + +VLIB_REGISTER_NODE (sfdp_lookup_ip6_node) = { + .name = "sfdp-lookup-ip6", + .vector_size = sizeof (u32), + .format_trace = format_sfdp_lookup_trace, + .type = VLIB_NODE_TYPE_INTERNAL, + .flags = VLIB_NODE_FLAG_ALLOW_LAZY_NEXT_NODES, + .runtime_data = &lookup_rt_data_default, + .runtime_data_bytes = sizeof (lookup_rt_data_default), + .n_errors = ARRAY_LEN (sfdp_lookup_error_strings), + .error_strings = sfdp_lookup_error_strings, +}; + +VLIB_REGISTER_NODE (sfdp_handoff_node) = { + .name = "sfdp-handoff", + .vector_size = sizeof (u32), + .format_trace = format_sfdp_handoff_trace, + .type = VLIB_NODE_TYPE_INTERNAL, + .flags = VLIB_NODE_FLAG_ALLOW_LAZY_NEXT_NODES, + .n_errors = ARRAY_LEN (sfdp_handoff_error_counters), + .error_counters = sfdp_handoff_error_counters, + .runtime_data = &lookup_rt_data_default, + .runtime_data_bytes = sizeof (lookup_rt_data_default), +}; diff --git a/src/vnet/sfdp/lookup/parser.c b/src/vnet/sfdp/lookup/parser.c new file mode 100644 index 00000000000..8dc8edbc9ba --- /dev/null +++ b/src/vnet/sfdp/lookup/parser.c @@ -0,0 +1,62 @@ +/* SPDX-License-Identifier: Apache-2.0 + * Copyright (c) 2025 Cisco Systems, Inc. + */ + +#include +#include +#include +#include +static uword +sfdp_create_parser (sfdp_parser_main_t *pm, + sfdp_parser_registration_mutable_t *reg) +{ + sfdp_parser_bihash_registration_t vft = + sfdp_parser_bihash_regs[reg->key_size]; + sfdp_parser_data_t parser = { 0 }; + void **key_ptd, **kv_ptd; + uword pi = vec_len (pm->parsers); + parser.bihash_table = + clib_mem_alloc_aligned (vft.table_size, CLIB_CACHE_LINE_BYTES); + clib_memset (parser.bihash_table, 0, vft.table_size); + vft.sfdp_parser_bihash_init_fn (parser.bihash_table, reg->name, + sfdp_ip4_num_buckets (), + sfdp_ip4_mem_size ()); + vec_validate (parser.keys_ptd, vlib_num_workers ()); + vec_validate (parser.kv_ptd, vlib_num_workers ()); + vec_foreach (key_ptd, parser.keys_ptd) + key_ptd[0] = clib_mem_alloc_aligned (reg->key_size * VLIB_FRAME_SIZE, + CLIB_CACHE_LINE_BYTES); + vec_foreach (kv_ptd, parser.kv_ptd) + kv_ptd[0] = + clib_mem_alloc_aligned (reg->key_size + 8, CLIB_CACHE_LINE_BYTES); + + parser.key_size = reg->key_size; + parser.name = reg->name; + parser.format_fn = reg->format_fn; + parser.normalize_key_fn = reg->normalize_key_fn; + vec_add1 (pm->parsers, parser); + return pi; +} + +static clib_error_t * +sfdp_parser_init (vlib_main_t *vm) +{ + sfdp_parser_main_t *pm = &sfdp_parser_main; + sfdp_parser_registration_mutable_t *current_reg = pm->regs; + vlib_call_init_function (vm, sfdp_init); + uword pi; + + while (current_reg) + { + pi = sfdp_create_parser (pm, current_reg); + current_reg->sfdp_parser_data_index = pi; + current_reg = current_reg->next; + } + return 0; +} + +#ifndef CLIB_MARCH_VARIANT +sfdp_parser_main_t sfdp_parser_main; +#endif + +VLIB_INIT_FUNCTION (sfdp_parser_init); \ No newline at end of file diff --git a/src/vnet/sfdp/lookup/parser.h b/src/vnet/sfdp/lookup/parser.h new file mode 100644 index 00000000000..832a3779ef7 --- /dev/null +++ b/src/vnet/sfdp/lookup/parser.h @@ -0,0 +1,126 @@ +/* SPDX-License-Identifier: Apache-2.0 + * Copyright (c) 2025 Cisco Systems, Inc. + */ + +#ifndef __included_lookup_parser_h__ +#define __included_lookup_parser_h__ +#include +#include +#include +#include + +#define SFDP_PARSER_MAX_KEY_SIZE 64 +typedef u8 calc_key_fn_t (vlib_buffer_t *b, u32 context_id, void *skey, + u64 *lookup_val, u64 *h, i16 *l4_hdr_offset, + u8 slowpath); + +typedef void normalize_key_fn_t (sfdp_session_t *session, void *result, + u8 key_idx); + +enum +{ + SFDP_PARSER_FORMAT_FUNCTION_INGRESS, + SFDP_PARSER_FORMAT_FUNCTION_EGRESS, + SFDP_PARSER_FORMAT_FUNCTION_CONTEXT, + SFDP_PARSER_N_FORMAT_FUNCTION +}; + +/* Per march parser registration structure */ +typedef struct +{ + char *name; + calc_key_fn_t *const calc_key_fn; + const uword key_size; + const uword proto_offset; + sfdp_session_type_t type; + format_function_t *format_fn[SFDP_PARSER_N_FORMAT_FUNCTION]; + normalize_key_fn_t *normalize_key_fn; + +} sfdp_parser_registration_t; + +typedef struct _sfdp_parser_registration_mutable_t +{ + struct _sfdp_parser_registration_mutable_t *next; + uword key_size; + uword sfdp_parser_data_index; + char *name; + vlib_node_registration_t *node_reg; + format_function_t *const *format_fn; + normalize_key_fn_t *normalize_key_fn; +} sfdp_parser_registration_mutable_t; + +typedef void sfdp_parser_bihash_init_fn_t (void *bihash, char *name, + u32 nbuckets, uword memory_size); +typedef int sfdp_parser_bihash_add_del_fn_t (void *bihash, void *kv, + int is_add); +typedef u64 sfdp_parser_bihash_hash_fn_t (void *kv); +typedef void sfdp_parser_bihash_prefetch_bucket_fn_t (void *bihash, u64 hash); +typedef int sfdp_parser_bihash_search_with_hash_fn_t (void *bihash, u64 hash, + void *kv_result); + +typedef int sfdp_parser_bihash_add_del_with_hash_fn_t ( + void *bihash, void *kv, u64 hash, u8 is_add, void *is_stale_cb, + void *is_stale_arg, void *overwrite_cb, void *overwrite_arg); + +/* Per march bihash vfts */ +typedef struct +{ + sfdp_parser_bihash_init_fn_t *const sfdp_parser_bihash_init_fn; + sfdp_parser_bihash_add_del_fn_t *const sfdp_parser_bihash_add_del_fn; + sfdp_parser_bihash_hash_fn_t *const sfdp_parser_bihash_hash_fn; + sfdp_parser_bihash_prefetch_bucket_fn_t + *const sfdp_parser_bihash_prefetch_bucket_fn; + sfdp_parser_bihash_search_with_hash_fn_t + *const sfdp_parser_bihash_search_with_hash_fn; + /* sfdp_parser_bihash_add_del_with_hash_fn_t *const + * sfdp_parser_bihash_add_del_with_hash_fn; */ + uword table_size; +} sfdp_parser_bihash_registration_t; + +typedef struct +{ + void *bihash_table; + void **keys_ptd; /* per thread vector of VLIB_FRAME_SIZE keys */ + void **kv_ptd; /* per thread vector of kv */ + uword key_size; + char *name; + format_function_t *const *format_fn; + normalize_key_fn_t *normalize_key_fn; +} sfdp_parser_data_t; + +typedef struct +{ + sfdp_parser_data_t *parsers; + sfdp_parser_registration_mutable_t *regs; + uword *parser_index_per_name; +} sfdp_parser_main_t; + +#ifndef CLIB_MARCH_VARIANT +#define SFDP_PARSER_REGISTER(x) \ + static const sfdp_parser_registration_t sfdp_parser_registration_##x; \ + sfdp_parser_registration_mutable_t sfdp_parser_registration_mutable_##x; \ + static void __sfdp_parser_registration_mutable_add_registration__##x (void) \ + __attribute__ ((__constructor__)); \ + static void __sfdp_parser_registration_mutable_add_registration__##x (void) \ + { \ + sfdp_parser_main_t *pm = &sfdp_parser_main; \ + sfdp_parser_registration_mutable_t *r = \ + &sfdp_parser_registration_mutable_##x; \ + r->next = pm->regs; \ + r->key_size = sfdp_parser_registration_##x.key_size; \ + r->name = sfdp_parser_registration_##x.name; \ + r->format_fn = sfdp_parser_registration_##x.format_fn; \ + r->normalize_key_fn = sfdp_parser_registration_##x.normalize_key_fn; \ + pm->regs = r; \ + } \ + static const sfdp_parser_registration_t sfdp_parser_registration_##x +#else +#define SFDP_PARSER_REGISTER(x) \ + extern sfdp_parser_registration_mutable_t \ + sfdp_parser_registration_mutable_##x; \ + static sfdp_parser_registration_t sfdp_parser_registration_##x +#endif + +extern sfdp_parser_main_t sfdp_parser_main; + +#endif /*__included_lookup_parser_h__*/ \ No newline at end of file diff --git a/src/vnet/sfdp/lookup/parser_inlines.h b/src/vnet/sfdp/lookup/parser_inlines.h new file mode 100644 index 00000000000..d6c6f87392b --- /dev/null +++ b/src/vnet/sfdp/lookup/parser_inlines.h @@ -0,0 +1,646 @@ +/* SPDX-License-Identifier: Apache-2.0 + * Copyright (c) 2025 Cisco Systems, Inc. + */ + +#ifndef __included_lookup_parser_inlines_h__ +#define __included_lookup_parser_inlines_h__ +#include +#include +#include +#include +#include +#include + +#if defined(__clang__) && __clang_major__ > 17 +#undef always_inline +#define __sfdp_inline_here [[clang::always_inline]] +#else +#define __sfdp_inline_here +#endif + +#define SFDP_PARSER_BIHASH_CALL_INLINE_FN(args...) \ + ({ __sfdp_inline_here SFDP_PARSER_BIHASH_CALL_FN (args); }) + +typedef struct +{ + u32 sw_if_index; + u8 key_data[64]; + u16 parser_index; + u8 is_sp; + union + { + struct + { + u32 next_index; + u64 hash; + u32 flow_id; + }; + struct + { + u32 sp_index; + u32 sp_node_index; + }; + }; +} sfdp_parser_lookup_trace_t; + +static_always_inline u8 +sfdp_parser_lookup_four (const sfdp_parser_registration_t *reg, + vlib_buffer_t **b, void *k, u64 *lookup_val, u64 *h, + i16 *l4_hdr_offset, int prefetch_buffer_stride, + u8 slowpath) +{ + vlib_buffer_t **pb = b + prefetch_buffer_stride; + u8 slowpath_needed = 0; + if (prefetch_buffer_stride) + { + clib_prefetch_load (pb[0]); + clib_prefetch_load (pb[0]->data); + } + + __sfdp_inline_here slowpath_needed |= + reg->calc_key_fn (b[0], b[0]->flow_id, k + 0, lookup_val + 0, h + 0, + l4_hdr_offset + 0, slowpath); + + if (prefetch_buffer_stride) + { + clib_prefetch_load (pb[1]); + clib_prefetch_load (pb[1]->data); + } + + __sfdp_inline_here slowpath_needed |= + reg->calc_key_fn (b[1], b[1]->flow_id, k + 1, lookup_val + 1, h + 1, + l4_hdr_offset + 1, slowpath); + + if (prefetch_buffer_stride) + { + clib_prefetch_load (pb[2]); + clib_prefetch_load (pb[2]->data); + } + + __sfdp_inline_here slowpath_needed |= + reg->calc_key_fn (b[2], b[2]->flow_id, k + 2, lookup_val + 2, h + 2, + l4_hdr_offset + 2, slowpath); + + if (prefetch_buffer_stride) + { + clib_prefetch_load (pb[3]); + clib_prefetch_load (pb[3]->data); + } + + __sfdp_inline_here slowpath_needed |= + reg->calc_key_fn (b[3], b[3]->flow_id, k + 3, lookup_val + 3, h + 3, + l4_hdr_offset + 3, slowpath); + return slowpath_needed; +} + +static_always_inline uword +sfdp_parser_prepare_all_keys (const sfdp_parser_registration_t *reg, + vlib_buffer_t **b, sfdp_session_ip4_key_t *k, + u64 *lv, u64 *h, i16 *l4_hdr_offset, u32 n_left, + u8 slowpath) +{ + /* main loop - prefetch next 4 buffers, + * prefetch previous 4 buckets */ + while (n_left >= 8) + { + if (sfdp_parser_lookup_four (reg, b, k, lv, h, l4_hdr_offset, 4, + slowpath) && + !slowpath) + return n_left; + + b += 4; + k += 4; + lv += 4; + h += 4; + l4_hdr_offset += 4; + n_left -= 4; + } + + /* last 4 packets - dont prefetch next 4 buffers, + * prefetch previous 4 buckets */ + if (n_left >= 4) + { + if (sfdp_parser_lookup_four (reg, b, k, lv, h, l4_hdr_offset, 0, + slowpath) && + !slowpath) + return n_left; + + b += 4; + k += 4; + lv += 4; + h += 4; + l4_hdr_offset += 4; + n_left -= 4; + } + + while (n_left > 0) + { + __sfdp_inline_here if (reg->calc_key_fn (b[0], b[0]->flow_id, k + 0, + lv + 0, h + 0, + l4_hdr_offset + 0, slowpath) && + !slowpath) return n_left; + + b += 1; + k += 1; + lv += 1; + h += 1; + l4_hdr_offset += 1; + n_left -= 1; + } + return 0; +} + +static_always_inline void +sfdp_parser_prepare_all_keys_slow (const sfdp_parser_registration_t *reg, + vlib_buffer_t **b, + sfdp_session_ip4_key_t *k, u64 *lv, u64 *h, + i16 *l4_hdr_offset, u32 n_left) +{ + sfdp_parser_prepare_all_keys (reg, b, k, lv, h, l4_hdr_offset, n_left, 1); +} + +static_always_inline uword +sfdp_parser_prepare_all_keys_fast (const sfdp_parser_registration_t *reg, + vlib_buffer_t **b, + sfdp_session_ip4_key_t *k, u64 *lv, u64 *h, + i16 *l4_hdr_offset, u32 n_left) +{ + return sfdp_parser_prepare_all_keys (reg, b, k, lv, h, l4_hdr_offset, n_left, + 0); +} + +static_always_inline int +sfdp_parser_create_session_inline (const sfdp_parser_registration_t *reg, + uword parser_data_index, sfdp_main_t *sfdp, + sfdp_per_thread_data_t *ptd, + sfdp_tenant_t *tenant, u16 tenant_idx, + u16 thread_index, f64 time_now, void *k, + u64 *h, u64 *lookup_val, u32 scope_index, + void *kv, const uword key_size, + void *table_bihash) +{ + u64 value; + u8 proto; + sfdp_session_t *session; + u32 session_idx; + u32 pseudo_flow_idx; + + session_idx = + sfdp_alloc_session (sfdp, ptd, thread_index != SFDP_UNBOUND_THREAD_INDEX); + + if (session_idx == ~0) + return 1; + + session = pool_elt_at_index (sfdp->sessions, session_idx); + + pseudo_flow_idx = (lookup_val[0] & 0x1) | (session_idx << 1); + value = sfdp_session_mk_table_value (thread_index, pseudo_flow_idx, + session->session_version + 1); + + clib_memcpy_fast (kv, k, key_size); + clib_memcpy_fast (kv + key_size, &value, sizeof (value)); + clib_memcpy_fast (&proto, k + reg->proto_offset, 1); + if (SFDP_PARSER_BIHASH_CALL_INLINE_FN (reg, sfdp_parser_bihash_add_del_fn, + table_bihash, kv, 2)) + { + /* colision - remote thread created same entry */ + sfdp_free_session (sfdp, ptd, session_idx); + return 2; + } + session->type = reg->type; + session->parser_index[SFDP_SESSION_KEY_PRIMARY] = parser_data_index; + session->key_flags = SFDP_SESSION_KEY_FLAG_PRIMARY_VALID_USER; + + // TODO: Would be nice to do this upon free instead to have avoid having to + // check + // if the session is valid at all when checking invalidation. + session->session_version += 1; + session->tenant_idx = tenant_idx; + session->state = SFDP_SESSION_STATE_FSOL; + session->owning_thread_index = thread_index; + session->scope_index = scope_index; + if (ptd) + sfdp_session_generate_and_set_id (sfdp, ptd, session); + + clib_memcpy_fast (session->bitmaps, tenant->bitmaps, + sizeof (session->bitmaps)); + clib_memcpy_fast (&session->keys_data[SFDP_SESSION_KEY_PRIMARY], k, + key_size); + + session->pseudo_dir[SFDP_SESSION_KEY_PRIMARY] = lookup_val[0] & 0x1; + session->proto = proto; + + lookup_val[0] ^= value; + /* Bidirectional counter zeroing */ + vlib_zero_combined_counter (&sfdp->per_session_ctr[SFDP_FLOW_COUNTER_LOOKUP], + lookup_val[0]); + vlib_zero_combined_counter (&sfdp->per_session_ctr[SFDP_FLOW_COUNTER_LOOKUP], + lookup_val[0] | 0x1); + vlib_increment_simple_counter ( + &sfdp->tenant_session_ctr[SFDP_TENANT_SESSION_COUNTER_CREATED], + thread_index, tenant_idx, 1); + return 0; +} + +static_always_inline uword +sfdp_parser_lookup_inline (vlib_main_t *vm, vlib_node_runtime_t *node, + vlib_frame_t *frame, + const sfdp_parser_registration_t *reg, + uword parser_data_index) +{ + sfdp_main_t *sfdp = &sfdp_main; + sfdp_parser_main_t *pm = &sfdp_parser_main; + u32 thread_index = vm->thread_index; + sfdp_per_thread_data_t *ptd = + vec_elt_at_index (sfdp->per_thread_data, thread_index); + vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b; + const uword key_size = reg->key_size; + sfdp_parser_data_t *parser = + vec_elt_at_index (pm->parsers, parser_data_index); + void *kv = vec_elt (parser->kv_ptd, thread_index); + void *table_bihash = parser->bihash_table; + void *keys = vec_elt (parser->keys_ptd, thread_index); + void *key = keys; + sfdp_tenant_t *tenant; + sfdp_session_t *session; + + u32 session_index; + u32 *bi, *from = vlib_frame_vector_args (frame); + u32 n_left = frame->n_vectors; + u32 to_local[VLIB_FRAME_SIZE], n_local = 0; + u32 to_remote[VLIB_FRAME_SIZE], n_remote = 0; + u32 to_sp[VLIB_FRAME_SIZE], n_to_sp = 0; + u16 thread_indices[VLIB_FRAME_SIZE]; + u16 local_next_indices[VLIB_FRAME_SIZE]; + u32 sp_indices[VLIB_FRAME_SIZE]; + u32 sp_node_indices[VLIB_FRAME_SIZE]; + vlib_buffer_t *local_bufs[VLIB_FRAME_SIZE]; + vlib_buffer_t *to_sp_bufs[VLIB_FRAME_SIZE]; + u32 local_flow_indices[VLIB_FRAME_SIZE]; + u32 created_session_indices[VLIB_FRAME_SIZE], n_created = 0; + + u64 hashes[VLIB_FRAME_SIZE], *h = hashes; + u32 lengths[VLIB_FRAME_SIZE], *len = lengths; + i16 l4_hdr_off[VLIB_FRAME_SIZE], *l4o = l4_hdr_off; + f64 time_now = vlib_time_now (vm); + /* lookup_vals contains: + * - (Phase 1) to_slow_path_node (1bit) + ||| slow_path_node_index (31bits) + * ||| zeros(31bits) + * ||| + * ||| packet_dir (1bit) + * + * - (Phase 2) session_version + thread_index + flow_index . Cf. sfdp.h + OR same as Phase 1 if slow path + ASSUMPTION: thread index < 2^31 */ + u64 __attribute__ ((aligned (32))) lookup_vals[VLIB_FRAME_SIZE], + *lv = lookup_vals; + __clib_unused u16 hit_count = 0; + uword n_left_slow_keys; + sfdp_lookup_node_runtime_data_t *rt = (void *) node->runtime_data; + u32 scope_index = rt->scope_index; + u32 fqi = + vec_elt_at_index (sfdp->frame_queue_index_per_scope, scope_index)[0]; + + vlib_get_buffers (vm, from, bufs, n_left); + b = bufs; + + if (PREDICT_FALSE ((n_left_slow_keys = sfdp_parser_prepare_all_keys_fast ( + reg, b, keys, lv, h, l4o, n_left)))) + { + uword n_done = n_left - n_left_slow_keys; + sfdp_parser_prepare_all_keys_slow ( + reg, b + n_done, keys + key_size * n_done, lv + n_done, h + n_done, + l4o + n_done, n_left_slow_keys); + } + + while (n_left) + { + if (PREDICT_TRUE (n_left > 8)) + SFDP_PARSER_BIHASH_CALL_INLINE_FN ( + reg, sfdp_parser_bihash_prefetch_bucket_fn, table_bihash, h[8]); + + if (PREDICT_TRUE (n_left > 1)) + vlib_prefetch_buffer_header (b[1], STORE); + + if (PREDICT_FALSE (lv[0] & SFDP_LV_TO_SP)) + goto next_pkt; + + clib_memcpy_fast (kv, key, key_size); + if (SFDP_PARSER_BIHASH_CALL_INLINE_FN ( + reg, sfdp_parser_bihash_search_with_hash_fn, table_bihash, h[0], + kv)) + { + u16 tenant_idx = sfdp_buffer (b[0])->tenant_index; + int rv; + tenant = sfdp_tenant_at_index (sfdp, tenant_idx); + rv = sfdp_parser_create_session_inline ( + reg, parser_data_index, sfdp, ptd, tenant, tenant_idx, + thread_index, time_now, key, h, lv, scope_index, kv, key_size, + table_bihash); + + if (PREDICT_FALSE (rv == 1)) + { + vlib_node_increment_counter ( + vm, node->node_index, SFDP_LOOKUP_ERROR_TABLE_OVERFLOW, 1); + lv[0] = + (u64) SFDP_SP_NODE_IP6_TABLE_OVERFLOW << 32 | SFDP_LV_TO_SP; + goto next_pkt; + } + else if (rv == 2) + { + vlib_node_increment_counter (vm, node->node_index, + SFDP_LOOKUP_ERROR_COLLISION, 1); + continue; /* if there is colision, we just reiterate */ + } + created_session_indices[n_created] = + sfdp_session_index_from_lookup (lv[0]); + n_created++; + } + else + { + lv[0] ^= *(u64 *) (kv + key_size); + hit_count++; + } + + b[0]->flow_id = sfdp_pseudo_flow_index_from_lookup (lv[0]); + + next_pkt: + b[0]->flags |= VNET_BUFFER_F_L4_HDR_OFFSET_VALID; + vnet_buffer (b[0])->l4_hdr_offset = l4o[0]; + len[0] = vlib_buffer_length_in_chain (vm, b[0]); + + b += 1; + n_left -= 1; + key += key_size; + h += 1; + lv += 1; + len += 1; + } + + // Notify created sessions + if (n_created) + { + sfdp_notify_new_sessions (sfdp, created_session_indices, n_created); + } + + n_left = frame->n_vectors; + lv = lookup_vals; + b = bufs; + bi = from; + len = lengths; + while (n_left) + { + u16 flow_thread_index; + u32 flow_index; + session_version_t session_version; + vlib_combined_counter_main_t *vcm; + + if (lv[0] & SFDP_LV_TO_SP) + { + to_sp[n_to_sp] = bi[0]; + sp_indices[n_to_sp] = (lv[0] & ~(SFDP_LV_TO_SP)) >> 32; + to_sp_bufs[n_to_sp] = b[0]; + n_to_sp++; + goto next_packet2; + } + + flow_thread_index = sfdp_thread_index_from_lookup (lv[0]); + flow_index = sfdp_pseudo_flow_index_from_lookup (lv[0]); + session_index = flow_index >> 1; + vcm = &sfdp->per_session_ctr[SFDP_FLOW_COUNTER_LOOKUP]; + session_version = sfdp_session_version_from_lookup (lv[0]); + vlib_increment_combined_counter (vcm, thread_index, flow_index, 1, + len[0]); + if (PREDICT_FALSE (flow_thread_index == SFDP_UNBOUND_THREAD_INDEX)) + { + flow_thread_index = thread_index; + sfdp_session_bind_to_thread (session_index, &flow_thread_index, 1); + /* flow_thread_index now necessarily contains the actual thread index + * of the session */ + } + if (flow_thread_index == thread_index) + { + /* known flow which belongs to this thread */ + to_local[n_local] = bi[0]; + local_flow_indices[n_local] = flow_index; + local_bufs[n_local] = b[0]; + n_local++; + } + else + { + /* known flow which belongs to remote thread */ + to_remote[n_remote] = bi[0]; + thread_indices[n_remote] = flow_thread_index; + /* Store the current session version in buffer to check if it's still + * valid after handoff */ + sfdp_buffer (b[0])->session_version_before_handoff = session_version; + n_remote++; + } + next_packet2: + n_left -= 1; + lv += 1; + b += 1; + bi += 1; + len += 1; + } + + /* handover buffers to remote node */ + if (n_remote) + { + u32 n_remote_enq; + n_remote_enq = vlib_buffer_enqueue_to_thread ( + vm, node, fqi, to_remote, thread_indices, n_remote, 1); + vlib_node_increment_counter (vm, node->node_index, + SFDP_LOOKUP_ERROR_REMOTE, n_remote_enq); + vlib_node_increment_counter (vm, node->node_index, + SFDP_LOOKUP_ERROR_CON_DROP, + n_remote - n_remote_enq); + } + + /* enqueue local */ + if (n_local) + { + u16 *current_next = local_next_indices; + u32 *local_flow_index = local_flow_indices; + uword session_scope_index; + b = local_bufs; + n_left = n_local; + + /* TODO: prefetch session and buffer + 4 loop */ + while (n_left) + { + session_index = local_flow_index[0] >> 1; + session = sfdp_session_at_index (session_index); + session_scope_index = session->scope_index; + if (PREDICT_TRUE (session_scope_index == scope_index)) + { + sfdp_bitmap_t pbmp = + session->bitmaps[sfdp_direction_from_flow_index ( + local_flow_index[0])]; + sfdp_buffer (b[0])->service_bitmap = pbmp; + + /* The tenant of the buffer is the tenant of the session */ + sfdp_buffer (b[0])->tenant_index = session->tenant_idx; + + sfdp_next (b[0], current_next); + } + else + current_next[0] = + SFDP_LOOKUP_NEXT_INDEX_FOR_SCOPE (session_scope_index); + + local_flow_index += 1; + current_next += 1; + b += 1; + n_left -= 1; + } + vlib_buffer_enqueue_to_next (vm, node, to_local, local_next_indices, + n_local); + vlib_node_increment_counter (vm, node->node_index, + SFDP_LOOKUP_ERROR_LOCAL, n_local); + } + + if (n_to_sp) + { + vlib_frame_t *f = NULL; + u32 *current_next_slot = NULL; + u32 current_left_to_next = 0; + u32 *current_to_sp = to_sp; + u32 *sp_index = sp_indices; + u32 *sp_node_index = sp_node_indices; + u32 last_node_index = VLIB_INVALID_NODE_INDEX; + + b = to_sp_bufs; + n_left = n_to_sp; + + while (n_left) + { + u32 node_index; + u16 tenant_idx; + sfdp_tenant_t *tenant; + + tenant_idx = sfdp_buffer (b[0])->tenant_index; + tenant = sfdp_tenant_at_index (sfdp, tenant_idx); + node_index = tenant->sp_node_indices[sp_index[0]]; + sp_node_index[0] = node_index; + + if (PREDICT_FALSE (node_index != last_node_index) || + current_left_to_next == 0) + { + if (f != NULL) + vlib_put_frame_to_node (vm, last_node_index, f); + f = vlib_get_frame_to_node (vm, node_index); + f->frame_flags |= node->flags & VLIB_NODE_FLAG_TRACE; + current_next_slot = vlib_frame_vector_args (f); + current_left_to_next = VLIB_FRAME_SIZE; + last_node_index = node_index; + } + + current_next_slot[0] = current_to_sp[0]; + + f->n_vectors += 1; + current_to_sp += 1; + b += 1; + sp_index += 1; + sp_node_index += 1; + current_next_slot += 1; + + current_left_to_next -= 1; + n_left -= 1; + } + vlib_put_frame_to_node (vm, last_node_index, f); + } + + if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE))) + { + int i; + b = bufs; + bi = from; + h = hashes; + u32 *in_local = to_local; + u32 *in_remote = to_remote; + u32 *in_sp = to_sp; + for (i = 0; i < frame->n_vectors; i++) + { + if (b[0]->flags & VLIB_BUFFER_IS_TRACED) + { + sfdp_parser_lookup_trace_t *t = + vlib_add_trace (vm, node, b[0], sizeof (*t)); + t->sw_if_index = vnet_buffer (b[0])->sw_if_index[VLIB_RX]; + t->flow_id = b[0]->flow_id; + t->hash = h[0]; + t->is_sp = 0; + t->parser_index = parser_data_index; + if (bi[0] == in_local[0]) + { + t->next_index = local_next_indices[(in_local++) - to_local]; + } + else if (bi[0] == in_remote[0]) + { + t->next_index = ~0; + in_remote++; + } + else + { + t->is_sp = 1; + t->sp_index = sp_indices[in_sp - to_sp]; + t->sp_node_index = sp_node_indices[in_sp - to_sp]; + in_sp++; + } + + clib_memcpy (&t->key_data, i * key_size + keys, key_size); + + bi++; + b++; + h++; + } + else + break; + } + } + return frame->n_vectors; +} + +#ifndef CLIB_MARCH_VARIANT +#define _SFDP_PARSER_DEFINE_NODE_AUX(x) \ + static void __sfdp_parser_definition_add_name__##x (void) \ + __attribute__ ((__constructor__)); \ + static void __sfdp_parser_definition_add_name__##x (void) \ + { \ + x##_node.name = sfdp_parser_registration_##x.name; \ + sfdp_parser_registration_mutable_##x.node_reg = &x##_node; \ + } + +#else +#define _SFDP_PARSER_DEFINE_NODE_AUX(x) +#endif +#define SFDP_PARSER_DEFINE_NODE(x) \ + VLIB_REGISTER_NODE (x##_node) = { \ + .vector_size = sizeof (u32), \ + .format_trace = 0, \ + .type = VLIB_NODE_TYPE_INTERNAL, \ + .flags = VLIB_NODE_FLAG_ALLOW_LAZY_NEXT_NODES, \ + .runtime_data = 0, \ + .runtime_data_bytes = sizeof (u8), \ + .n_errors = ARRAY_LEN (sfdp_lookup_error_strings), \ + .error_strings = sfdp_lookup_error_strings, \ + }; \ + \ + VLIB_NODE_FN (x##_node) \ + (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame) \ + { \ + return sfdp_parser_lookup_inline ( \ + vm, node, frame, &sfdp_parser_registration_##x, \ + sfdp_parser_registration_mutable_##x.sfdp_parser_data_index); \ + } \ + _SFDP_PARSER_DEFINE_NODE_AUX (x) + +#if defined(__clang__) && __clang_major__ > 17 +#if CLIB_DEBUG > 0 +#define always_inline static inline +#else +#define always_inline static inline __attribute__ ((__always_inline__)) +#endif +#endif + +#endif \ No newline at end of file diff --git a/src/vnet/sfdp/lookup/reass.c b/src/vnet/sfdp/lookup/reass.c new file mode 100644 index 00000000000..a1966a2fab1 --- /dev/null +++ b/src/vnet/sfdp/lookup/reass.c @@ -0,0 +1,74 @@ +/* + * Copyright (c) 2024 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#include +#include +#include + +sfdp_reass_main_t sfdp_reass_main; + +static clib_error_t * +sfdp_reass_main_init (vlib_main_t *vm) +{ + sfdp_reass_main_t *vrm = &sfdp_reass_main; + vrm->ip4_sv_reass_next_index = + ip4_sv_reass_custom_context_register_next_node ( + sfdp_lookup_ip4_node.index); + vrm->ip6_sv_reass_next_index = + ip6_sv_reass_custom_context_register_next_node ( + sfdp_lookup_ip6_node.index); + vrm->ip4_full_reass_next_index = + ip4_full_reass_custom_context_register_next_node ( + sfdp_lookup_ip4_node.index); + vrm->ip6_full_reass_next_index = + ip6_full_reass_custom_context_register_next_node ( + sfdp_lookup_ip6_node.index); + vrm->ip4_full_reass_err_next_index = ip4_full_reass_get_error_next_index (); + vrm->ip6_full_reass_err_next_index = ip6_full_reass_get_error_next_index (); + return 0; +} + +void +sfdp_ip4_full_reass_custom_context_register_next_node (u16 node_index) +{ + sfdp_reass_main.ip4_full_reass_next_index = + ip4_full_reass_custom_context_register_next_node (node_index); +} + +void +sfdp_ip6_full_reass_custom_context_register_next_node (u16 node_index) +{ + sfdp_reass_main.ip6_full_reass_next_index = + ip6_full_reass_custom_context_register_next_node (node_index); +} + +void +sfdp_ip4_full_reass_custom_context_register_next_err_node (u16 node_index) +{ + sfdp_reass_main.ip4_full_reass_err_next_index = + ip4_full_reass_custom_context_register_next_node (node_index); +} + +void +sfdp_ip6_full_reass_custom_context_register_next_err_node (u16 node_index) +{ + sfdp_reass_main.ip6_full_reass_err_next_index = + ip6_full_reass_custom_context_register_next_node (node_index); +} + +VLIB_INIT_FUNCTION (sfdp_reass_main_init); diff --git a/src/vnet/sfdp/lookup/reass.h b/src/vnet/sfdp/lookup/reass.h new file mode 100644 index 00000000000..5e188dbc0f9 --- /dev/null +++ b/src/vnet/sfdp/lookup/reass.h @@ -0,0 +1,24 @@ +/* SPDX-License-Identifier: Apache-2.0 + * Copyright (c) 2025 Cisco Systems, Inc. + */ + +#ifndef __included_lookup_reass_h__ +#define __included_lookup_reass_h__ + +#include +typedef struct +{ + /* Shallow Virtual Reassembly */ + u16 ip4_sv_reass_next_index; + u16 ip6_sv_reass_next_index; + + /* Full Reassembly */ + u16 ip4_full_reass_next_index; + u16 ip6_full_reass_next_index; + + /* Full Reassembly error next index */ + u16 ip4_full_reass_err_next_index; + u16 ip6_full_reass_err_next_index; +} sfdp_reass_main_t; +extern sfdp_reass_main_t sfdp_reass_main; +#endif \ No newline at end of file diff --git a/src/vnet/sfdp/lookup/sfdp_bihashes.h b/src/vnet/sfdp/lookup/sfdp_bihashes.h new file mode 100644 index 00000000000..992d1b394a2 --- /dev/null +++ b/src/vnet/sfdp/lookup/sfdp_bihashes.h @@ -0,0 +1,40 @@ +/* SPDX-License-Identifier: Apache-2.0 + * Copyright (c) 2025 Cisco Systems, Inc. + */ + +#ifndef __included_lookup_sfdp_bihashes_h__ +#define __included_lookup_sfdp_bihashes_h__ +#include +#include +#include +#include +#include +#include +#define foreach_clib_bihash_kv \ + _ (24, 8) \ + _ (32, 8) \ + _ (40, 8) \ + _ (48, 8) \ + _ (56, 8) + +__attribute__ ((__unused__)) static const sfdp_parser_bihash_registration_t + sfdp_parser_bihash_regs[SFDP_PARSER_MAX_KEY_SIZE] = { +#define _(k, v) \ + [k] = { \ + .table_size = sizeof (clib_bihash_##k##_##v##_t), \ + .sfdp_parser_bihash_add_del_fn = (void *) clib_bihash_add_del_##k##_##v, \ + .sfdp_parser_bihash_hash_fn = (void *) clib_bihash_hash_##k##_##v, \ + .sfdp_parser_bihash_init_fn = (void *) clib_bihash_init_##k##_##v, \ + .sfdp_parser_bihash_prefetch_bucket_fn = \ + (void *) clib_bihash_prefetch_bucket_##k##_##v, \ + .sfdp_parser_bihash_search_with_hash_fn = \ + (void *) clib_bihash_search_inline_with_hash_##k##_##v, \ + }, + + foreach_clib_bihash_kv +#undef _ + }; + +#define SFDP_PARSER_BIHASH_CALL_FN(x, fn, args...) \ + sfdp_parser_bihash_regs[(x)->key_size].fn (args) +#endif \ No newline at end of file diff --git a/src/vnet/sfdp/lookup/sv_reass_node.c b/src/vnet/sfdp/lookup/sv_reass_node.c new file mode 100644 index 00000000000..e8d6ce81d92 --- /dev/null +++ b/src/vnet/sfdp/lookup/sv_reass_node.c @@ -0,0 +1,148 @@ +/* + * Copyright (c) 2022 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#include +#include + +typedef struct +{ +} sfdp_lookup_sp_sv_reass_trace_t; + +static u8 * +format_sfdp_lookup_sp_sv_reass_trace (u8 *s, va_list *args) +{ + CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); + vlib_node_t *node = va_arg (*args, vlib_node_t *); + CLIB_UNUSED (sfdp_lookup_sp_sv_reass_trace_t * t) = + va_arg (*args, sfdp_lookup_sp_sv_reass_trace_t *); + + s = format (s, "%v: sent to svr node", node->name); + return s; +} + +#define foreach_sfdp_lookup_sp_sv_reass_next \ + _ (IP4_SVR, "ip4-sv-reassembly-custom-context") \ + _ (IP6_SVR, "ip6-sv-reassembly-custom-context") + +enum +{ +#define _(sym, str) SFDP_LOOKUP_SP_SV_REASS_NEXT_##sym, + foreach_sfdp_lookup_sp_sv_reass_next +#undef _ + SFDP_LOOKUP_SP_SV_REASS_N_NEXT +}; + +#define foreach_sfdp_lookup_sp_sv_reass_error _ (NOERROR, "No error") + +typedef enum +{ +#define _(sym, str) SFDP_LOOKUP_SP_SV_REASS_ERROR_##sym, + SFDP_LOOKUP_SP_SV_REASS_N_ERROR +#undef _ +} sfdp_lookup_sp_sv_reass_error_t; + +static char *sfdp_lookup_sp_sv_reass_error_strings[] = { +#define _(sym, str) str, + foreach_sfdp_lookup_sp_sv_reass_error +#undef _ +}; + +static_always_inline u32 +sfdp_lookup_sp_sv_reass_inline (vlib_main_t *vm, vlib_node_runtime_t *node, + vlib_frame_t *frame, bool is_ip6) +{ + sfdp_reass_main_t *vrm = &sfdp_reass_main; + vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b; + u32 aux_data[VLIB_FRAME_SIZE], *a; + u32 *from = vlib_frame_vector_args (frame); + u32 n_left = frame->n_vectors; + + vlib_get_buffers (vm, from, bufs, n_left); + b = bufs; + a = aux_data; + // TODO: prefetch + 4-loop + while (n_left) + { + a[0] = b[0]->flow_id; + if (b[0]->flags & VLIB_BUFFER_IS_TRACED) + { + sfdp_lookup_sp_sv_reass_trace_t *t = + vlib_add_trace (vm, node, b[0], sizeof (*t)); + } + + /* Save the tenant index */ + sfdp_buffer2 (b[0])->tenant_index = sfdp_buffer (b[0])->tenant_index; + sfdp_buffer2 (b[0])->flags = SFDP_BUFFER_FLAG_SV_REASSEMBLED; + + vnet_buffer (b[0])->ip.reass.next_index = + is_ip6 ? vrm->ip6_sv_reass_next_index : vrm->ip4_sv_reass_next_index; + b += 1; + a += 1; + n_left -= 1; + } + vlib_buffer_enqueue_to_single_next_with_aux ( + vm, node, from, aux_data, + is_ip6 ? SFDP_LOOKUP_SP_SV_REASS_NEXT_IP6_SVR : + SFDP_LOOKUP_SP_SV_REASS_NEXT_IP4_SVR, + frame->n_vectors); + return frame->n_vectors; +} + +VLIB_NODE_FN (sfdp_lookup_ip4_sp_sv_reass) +(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame) +{ + return sfdp_lookup_sp_sv_reass_inline (vm, node, frame, 0); +} + +VLIB_REGISTER_NODE (sfdp_lookup_ip4_sp_sv_reass) = { + .name = "sfdp-lookup-ip4-sp-sv-reass", + .vector_size = sizeof (u32), + .format_trace = format_sfdp_lookup_sp_sv_reass_trace, + .type = VLIB_NODE_TYPE_INTERNAL, + + .n_errors = ARRAY_LEN (sfdp_lookup_sp_sv_reass_error_strings), + .error_strings = sfdp_lookup_sp_sv_reass_error_strings, + .next_nodes = { +#define _(sym, str) [SFDP_LOOKUP_SP_SV_REASS_NEXT_##sym] = str, + foreach_sfdp_lookup_sp_sv_reass_next +#undef _ + }, + .n_next_nodes = SFDP_LOOKUP_SP_SV_REASS_N_NEXT, +}; + +VLIB_NODE_FN (sfdp_lookup_ip6_sp_sv_reass) +(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame) +{ + return sfdp_lookup_sp_sv_reass_inline (vm, node, frame, 1); +} + +VLIB_REGISTER_NODE (sfdp_lookup_ip6_sp_sv_reass) = { + .name = "sfdp-lookup-ip6-sp-sv-reass", + .vector_size = sizeof (u32), + .format_trace = format_sfdp_lookup_sp_sv_reass_trace, + .type = VLIB_NODE_TYPE_INTERNAL, + + .n_errors = ARRAY_LEN (sfdp_lookup_sp_sv_reass_error_strings), + .error_strings = sfdp_lookup_sp_sv_reass_error_strings, + .next_nodes = { +#define _(sym, str) [SFDP_LOOKUP_SP_SV_REASS_NEXT_##sym] = str, + foreach_sfdp_lookup_sp_sv_reass_next +#undef _ + }, + .n_next_nodes = SFDP_LOOKUP_SP_SV_REASS_N_NEXT, +}; diff --git a/src/vnet/sfdp/service.c b/src/vnet/sfdp/service.c new file mode 100644 index 00000000000..d6547154140 --- /dev/null +++ b/src/vnet/sfdp/service.c @@ -0,0 +1,347 @@ +/* SPDX-License-Identifier: Apache-2.0 + * Copyright (c) 2025 Cisco Systems, Inc. + */ + +#include +#include +#include +#include +#include +#include + +static sfdp_service_registration_t ** +sfdp_service_init_for_scope (vlib_main_t *vm, + sfdp_service_registration_t **services, + uword *index_reg_by_name, + uword **service_index_by_name) +{ + sfdp_service_registration_t *current_reg; + sfdp_service_registration_t **res_services = 0; + u8 **runs_after_table = 0; + u8 **closure = 0; + uword *ordered_indices = 0; + uword current_index = vec_len (services); + + /* Build the constraints matrix */ + if (current_index == 0) + return res_services; + current_reg = services[0]; + runs_after_table = clib_ptclosure_alloc (current_index); + + while (current_index > 0) + { + char **current_target; + current_index--; + current_reg = vec_elt_at_index (services, current_index)[0]; + + /* Process runs_before and runs_after constraints */ + current_target = current_reg->runs_before; + while (current_target[0]) + { + uword *res = hash_get_mem (index_reg_by_name, current_target[0]); + if (res) + runs_after_table[res[0]][current_index] = 1; + current_target++; + } + current_target = current_reg->runs_after; + while (current_target[0]) + { + uword *res = hash_get_mem (index_reg_by_name, current_target[0]); + if (res) + runs_after_table[current_index][res[0]] = 1; + current_target++; + } + } + /*hash_free (index_reg_by_name);*/ + closure = clib_ptclosure (runs_after_table); +again: + for (int i = 0; i < vec_len (services); i++) + { + for (int j = 0; j < vec_len (services); j++) + { + if (closure[i][j]) + { + /* i runs after j so it can't be output */ + goto skip_i; + } + } + /* i doesn't run after any pending element so it can be output */ + vec_add1 (ordered_indices, i); + for (int j = 0; j < vec_len (services); j++) + closure[j][i] = 0; + closure[i][i] = 1; + goto again; + skip_i:; + } + if (vec_len (services) != vec_len (ordered_indices)) + clib_panic ("Failed to build total order for sfdp services"); + clib_ptclosure_free (runs_after_table); + clib_ptclosure_free (closure); + + vec_resize (res_services, vec_len (services)); + for (uword i = 0; i < vec_len (ordered_indices); i++) + { + current_reg = vec_elt_at_index (services, ordered_indices[i])[0]; + *current_reg->index_in_bitmap = i; + *current_reg->service_mask = 1ULL << i; + res_services[i] = current_reg; + hash_set_mem (*service_index_by_name, current_reg->node_name, i); + } + /*sm->service_index_by_name = service_index_by_name;*/ + /*vec_free (services);*/ + vec_free (ordered_indices); + + /* Build the graph */ + services = res_services; + for (uword i = 0; i < vec_len (services); i++) + { + sfdp_service_registration_t *reg_i = vec_elt_at_index (services, i)[0]; + vlib_node_t *node_i = + vlib_get_node_by_name (vm, (u8 *) reg_i->node_name); + if (node_i == 0) + continue; + if (reg_i->is_terminal) + continue; + sfdp_service_next_indices_init (vm, node_i->index, services); + } + return res_services; +} + +static void +sfdp_service_init_parser_node_for_scope ( + vlib_main_t *vm, vlib_node_registration_t *original_reg, + sfdp_service_registration_t **services, u32 scope_index, + const char *scope_name) +{ + sfdp_main_t *sfdp = &sfdp_main; + uword *parser_node_index_per_scope; + vlib_node_registration_t r; + sfdp_lookup_node_runtime_data_t rt = { .scope_index = scope_index }; + + uword original_node_index; + uword node_index; + + original_node_index = original_reg->index; + vec_validate (sfdp->parser_node_index_per_scope_per_original, + original_node_index); + parser_node_index_per_scope = vec_elt_at_index ( + sfdp->parser_node_index_per_scope_per_original, original_node_index)[0]; + vec_validate (parser_node_index_per_scope, scope_index); + if (scope_index != 0) + { + clib_memset (&r, 0, sizeof (r)); + r.vector_size = sizeof (u32); + r.format_trace = original_reg->format_trace; + r.type = VLIB_NODE_TYPE_INTERNAL; + r.runtime_data = &rt; + r.runtime_data_bytes = sizeof (rt); + r.n_errors = original_reg->n_errors; + r.error_strings = original_reg->error_strings; + r.error_counters = original_reg->error_counters; + r.node_fn_registrations = original_reg->node_fn_registrations; + r.flags = original_reg->flags; + node_index = + vlib_register_node (vm, &r, "%s-%s", original_reg->name, scope_name); + } + else + node_index = original_node_index; + + parser_node_index_per_scope[scope_index] = node_index; + sfdp->parser_node_index_per_scope_per_original[original_node_index] = + parser_node_index_per_scope; + sfdp_service_next_indices_init (vm, node_index, services); +} + +static void +sfdp_service_init_nodes_for_scope (vlib_main_t *vm, u32 scope_index) +{ + sfdp_service_main_t *sm = &sfdp_service_main; + sfdp_main_t *sfdp = &sfdp_main; + sfdp_parser_main_t *pm = &sfdp_parser_main; + + const char *scope_name = vec_elt_at_index (sm->scope_names, scope_index)[0]; + vlib_node_registration_t r; + sfdp_service_registration_t **services = + vec_elt_at_index (sm->services_per_scope_index, scope_index)[0]; + uword node_index; + sfdp_lookup_node_runtime_data_t rt = { .scope_index = scope_index }; + sfdp_parser_registration_mutable_t *preg = pm->regs; + +#define _(n, s, x) \ + if (scope_index != 0) \ + { \ + clib_memset (&r, 0, sizeof (r)); \ + r.vector_size = sizeof (u32); \ + r.format_trace = (n).format_trace; \ + r.type = VLIB_NODE_TYPE_INTERNAL; \ + r.runtime_data = &rt; \ + r.runtime_data_bytes = sizeof (rt); \ + r.n_errors = (n).n_errors; \ + r.error_strings = (n).error_strings; \ + r.error_counters = (n).error_counters; \ + r.node_fn_registrations = (n).node_fn_registrations; \ + r.flags = (n).flags; \ + node_index = vlib_register_node (vm, &r, s "-%s", scope_name); \ + vec_validate (sfdp->x##_node_index_per_scope, scope_index); \ + sfdp->x##_node_index_per_scope[scope_index] = node_index; \ + } \ + else \ + { \ + node_index = (n).index; \ + vec_validate (sfdp->x##_node_index_per_scope, scope_index); \ + sfdp->x##_node_index_per_scope[scope_index] = node_index; \ + } \ + \ + sfdp_service_next_indices_init (vm, node_index, services); + + _ (sfdp_lookup_ip4_node, "sfdp-lookup-ip4", ip4_lookup) + _ (sfdp_lookup_ip6_node, "sfdp-lookup-ip6", ip6_lookup) + _ (sfdp_handoff_node, "sfdp-handoff", handoff) +#undef _ + vec_validate (sfdp->frame_queue_index_per_scope, scope_index); + + /* Last node index is handoff node */ + sfdp->frame_queue_index_per_scope[scope_index] = + vlib_frame_queue_main_init (node_index, 0); + + /* Same work for all parser nodes */ + while (preg) + { + if (preg->node_reg) + sfdp_service_init_parser_node_for_scope (vm, preg->node_reg, services, + scope_index, scope_name); + preg = preg->next; + } +} + +static clib_error_t * +sfdp_service_init (vlib_main_t *vm) +{ + sfdp_service_main_t *sm = &sfdp_service_main; + sfdp_service_registration_t ***services_per_scope_index = 0; + sfdp_service_registration_t *current_reg; + uword *index_reg_by_name = hash_create_string (0, sizeof (uword)); + uword *service_index_by_name = hash_create_string (0, sizeof (uword)); + uword *scope_index_by_name = hash_create_string (0, sizeof (uword)); + u32 n_scopes = 1; + const char **scope_names = 0; + + vec_validate (services_per_scope_index, 0); + vec_validate (scope_names, 0); + scope_names[0] = "default"; + + current_reg = sm->next_service; + + vlib_call_init_function (vm, sfdp_parser_init); + /* Parse the registrations linked list */ + while (current_reg) + { + sfdp_service_registration_t **services; + uword *si; + u32 scope_index; + const char *name = current_reg->node_name; + const char *scope = current_reg->scope; + uword *res = hash_get_mem (index_reg_by_name, name); + uword current_index; + + if (res) + clib_panic ("Trying to register %s twice!", name); + + /* Scope already exists ? */ + if (scope == 0) + scope_index = 0; + else if ((si = hash_get_mem (scope_index_by_name, scope)) == 0) + { + /* Create scope */ + scope_index = n_scopes; + n_scopes += 1; + hash_set_mem (scope_index_by_name, scope, scope_index); + vec_validate (scope_names, scope_index); + scope_names[scope_index] = scope; + } + else + scope_index = *si; + + vec_validate (services_per_scope_index, scope_index); + + services = *vec_elt_at_index (services_per_scope_index, scope_index); + current_index = vec_len (services); + vec_add1 (services, current_reg); + services_per_scope_index[scope_index] = services; + hash_set_mem (index_reg_by_name, name, current_index); + current_reg = current_reg->next; + } + + /* Initialise each scope */ + for (int i = 0; i < n_scopes; i++) + { + sfdp_service_registration_t **res_services; + res_services = sfdp_service_init_for_scope ( + vm, services_per_scope_index[i], index_reg_by_name, + &service_index_by_name); + vec_free (services_per_scope_index[i]); + services_per_scope_index[i] = res_services; + } + sm->scope_names = scope_names; + sm->scope_index_by_name = scope_index_by_name; + sm->n_scopes = n_scopes; + sm->service_index_by_name = service_index_by_name; + sm->services_per_scope_index = services_per_scope_index; + hash_free (index_reg_by_name); + + /* Create the lookup nodes for each scope */ + for (int i = 0; i < n_scopes; i++) + sfdp_service_init_nodes_for_scope (vm, i); + + /* Connect lookup nodes to handoff nodes of other scopes */ + for (int i = 0; i < n_scopes; i++) + for (int j = 0; j < n_scopes; j++) + { + uword from_ni_v4, from_ni_v6, from_ni_hoff, from_ni_parser, to_ni; + uword **parser_node_index_per_scope; + from_ni_v4 = sfdp_main.ip4_lookup_node_index_per_scope[i]; + from_ni_v6 = sfdp_main.ip6_lookup_node_index_per_scope[i]; + from_ni_hoff = sfdp_main.handoff_node_index_per_scope[i]; + to_ni = sfdp_main.handoff_node_index_per_scope[j]; + + if (i == j) + continue; + vlib_node_add_next_with_slot (vm, from_ni_v4, to_ni, + SFDP_LOOKUP_NEXT_INDEX_FOR_SCOPE (j)); + vlib_node_add_next_with_slot (vm, from_ni_v6, to_ni, + SFDP_LOOKUP_NEXT_INDEX_FOR_SCOPE (j)); + vlib_node_add_next_with_slot (vm, from_ni_hoff, to_ni, + SFDP_LOOKUP_NEXT_INDEX_FOR_SCOPE (j)); + + /* Connect each parser_node for scope i to handoff of scope j */ + vec_foreach (parser_node_index_per_scope, + sfdp_main.parser_node_index_per_scope_per_original) + if (vec_len (parser_node_index_per_scope) > i) + { + from_ni_parser = parser_node_index_per_scope[0][i]; + vlib_node_add_next_with_slot ( + vm, from_ni_parser, to_ni, + SFDP_LOOKUP_NEXT_INDEX_FOR_SCOPE (j)); + } + } + + vlib_node_main_lazy_next_update (vm); + return 0; +} + +void +sfdp_service_next_indices_init (vlib_main_t *vm, uword node_index, + sfdp_service_registration_t **services) +{ + for (uword i = 0; i < vec_len (services); i++) + { + sfdp_service_registration_t *reg = vec_elt_at_index (services, i)[0]; + vlib_node_t *node = vlib_get_node_by_name (vm, (u8 *) reg->node_name); + if (node) + vlib_node_add_next_with_slot (vm, node_index, node->index, + *reg->index_in_bitmap); + } +} + +VLIB_INIT_FUNCTION (sfdp_service_init); +sfdp_service_main_t sfdp_service_main; \ No newline at end of file diff --git a/src/vnet/sfdp/service.h b/src/vnet/sfdp/service.h new file mode 100644 index 00000000000..a2035735c0d --- /dev/null +++ b/src/vnet/sfdp/service.h @@ -0,0 +1,99 @@ +/* SPDX-License-Identifier: Apache-2.0 + * Copyright (c) 2025 Cisco Systems, Inc. + */ + +#ifndef __included_sfdp_service_h__ +#define __included_sfdp_service_h__ +#include +#include + +typedef struct _sfdp_service_registration_t +{ + struct _sfdp_service_registration_t *next; + const char *node_name; + const char *scope; + char **runs_before; + char **runs_after; + u8 *index_in_bitmap; + sfdp_bitmap_t *service_mask; + u8 is_terminal; +} sfdp_service_registration_t; + +typedef struct +{ + sfdp_service_registration_t *next_service; + sfdp_service_registration_t ***services_per_scope_index; + uword *scope_index_by_name; + const char **scope_names; + uword n_scopes; + uword *service_index_by_name; +} sfdp_service_main_t; + +extern sfdp_service_main_t sfdp_service_main; + +#define SFDP_SERVICE_DECLARE(x) \ + extern u8 sfdp_service_index_in_bitmap_##x; \ + extern sfdp_bitmap_t sfdp_service_mask_##x; + +#define SFDP_SERVICE_MASK(x) sfdp_service_mask_##x +#define SFDP_SERVICE_INDEX(x) sfdp_service_index_in_bitmap_##x + +#ifndef CLIB_MARCH_VARIANT +#define SFDP_SERVICE_DEFINE(x) \ + static sfdp_service_registration_t sfdp_service_registration_##x; \ + static void __sfdp_service_add_registration_##x (void) \ + __attribute__ ((__constructor__)); \ + u8 sfdp_service_index_in_bitmap_##x; \ + sfdp_bitmap_t sfdp_service_mask_##x; \ + static void __sfdp_service_add_registration_##x (void) \ + { \ + sfdp_service_main_t *sm = &sfdp_service_main; \ + sfdp_service_registration_t *r = &sfdp_service_registration_##x; \ + r->next = sm->next_service; \ + sm->next_service = r; \ + r->index_in_bitmap = &sfdp_service_index_in_bitmap_##x; \ + r->service_mask = &sfdp_service_mask_##x; \ + } \ + static sfdp_service_registration_t sfdp_service_registration_##x +#else +#define SFDP_SERVICE_DEFINE(x) \ + SFDP_SERVICE_DECLARE (x); \ + static sfdp_service_registration_t __clib_unused \ + unused_sfdp_service_registration_##x + +#endif + +#define SFDP_SERVICES(...) \ + (char *[]) \ + { \ + __VA_ARGS__, 0 \ + } + +static_always_inline void +sfdp_next (vlib_buffer_t *b, u16 *next_index) +{ + sfdp_bitmap_t bmp = sfdp_buffer (b)->service_bitmap; + u8 first = __builtin_ffsll (bmp); + ASSERT (first != 0); + *next_index = (first - 1); + sfdp_buffer (b)->service_bitmap ^= 1ULL << (first - 1); +} + +#define foreach_sfdp_scope_index(s_var) \ + for (s_var = 0; s_var < sfdp_service_main.n_scopes; s_var++) +void sfdp_service_next_indices_init (vlib_main_t *vm, uword node_index, + sfdp_service_registration_t **services); + +static_always_inline u8 +sfdp_get_service_index_by_name (const char *name) +{ + sfdp_service_main_t *sm = &sfdp_service_main; + uword *res = hash_get_mem (sm->service_index_by_name, name); + if (res == NULL) + { + clib_panic ("Unknown service name '%s'", name); + } + return *res; +} + +#endif //__included_service_h__ \ No newline at end of file diff --git a/src/vnet/sfdp/sfdp.api b/src/vnet/sfdp/sfdp.api new file mode 100644 index 00000000000..1e4bf29e780 --- /dev/null +++ b/src/vnet/sfdp/sfdp.api @@ -0,0 +1,115 @@ +/* SPDX-License-Identifier: Apache-2.0 + * Copyright (c) 2025 Cisco Systems, Inc. + */ + +option version = "0.0.1"; +import "vnet/ip/ip_types.api"; +import "vnet/sfdp/sfdp_types.api"; + +/** \brief + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request + @param tenant_id - tenant id to add + @param context_id - context id for this tenant (optional, default to same as tenant id) + @param is_del +*/ + +autoreply define sfdp_tenant_add_del +{ + u32 client_index; + u32 context; + + u32 tenant_id; + u32 context_id [default=0xffffffff]; + bool is_del; +}; + +autoreply define sfdp_set_services +{ + u32 client_index; + u32 context; + + u32 tenant_id; + vl_api_sfdp_session_direction_t dir; + u8 n_services; + vl_api_sfdp_service_name_t services[n_services]; +}; + +define sfdp_session_dump +{ + u32 client_index; + u32 context; +}; + +define sfdp_session_details +{ + u32 context; + + u64 session_id; + u32 thread_index; + u32 tenant_id; + u32 session_idx; + vl_api_sfdp_session_type_t session_type; + vl_api_ip_proto_t protocol; + vl_api_sfdp_session_state_t state; + f64 remaining_time; +/* Avoid service strings to keep it compact */ + u64 forward_bitmap; + u64 reverse_bitmap; + u8 n_keys; + vl_api_sfdp_session_key_t keys[n_keys]; +}; + +define sfdp_tenant_dump +{ + u32 client_index; + u32 context; +}; + +define sfdp_tenant_details +{ + u32 context; + + u32 index; + u32 context_id; + u64 forward_bitmap; + u64 reverse_bitmap; + u32 n_timeout; + u32 timeout[n_timeout]; +}; + +autoreply define sfdp_set_timeout +{ + u32 client_index; + u32 context; + + u32 tenant_id; + u32 timeout_id; + u32 timeout_value; +}; + +autoreply define sfdp_set_sp_node +{ + u32 client_index; + u32 context; + + u32 tenant_id; + vl_api_sfdp_sp_node_t sp_node; + u32 node_index; +}; + +autoreply define sfdp_set_icmp_error_node +{ + u32 client_index; + u32 context; + + u32 tenant_id; + bool is_ip6; + u32 node_index; +}; + +/* + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/vnet/sfdp/sfdp.c b/src/vnet/sfdp/sfdp.c new file mode 100644 index 00000000000..8d24306d55a --- /dev/null +++ b/src/vnet/sfdp/sfdp.c @@ -0,0 +1,550 @@ +/* SPDX-License-Identifier: Apache-2.0 + * Copyright (c) 2025 Cisco Systems, Inc. + */ + +#define _GNU_SOURCE +#include + +#include +/* + * Not needed because instanciated in ip6_fib.c + * #include + */ +#undef __included_bihash_template_inlines_h__ +#include + +#include +#include + +#include +/* + * Not needed because instanciated in ip6_forward.c + * #include + */ +#undef __included_bihash_template_inlines_h__ +#include + +#include +/* + * Not needed because instanciated in session_lookup.c + * #include + */ +#undef __included_bihash_template_inlines_h__ +#include + +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#define SFDP_DEFAULT_BITMAP SFDP_SERVICE_MASK (drop) + +SFDP_SERVICE_DECLARE (drop) + +sfdp_main_t sfdp_main; + +static void +sfdp_init_ptd_counters () +{ + sfdp_main_t *sfdp = &sfdp_main; +#define _(x, y) \ + u8 *name = format (0, y "%c", 0); \ + u8 *stat_seg_name = format (0, "/sfdp/per_flow_counters/" y "%c", 0); \ + sfdp->per_session_ctr[SFDP_FLOW_COUNTER_##x].name = (char *) name; \ + sfdp->per_session_ctr[SFDP_FLOW_COUNTER_##x].stat_segment_name = \ + (char *) stat_seg_name; \ + vlib_validate_combined_counter ( \ + &sfdp->per_session_ctr[SFDP_FLOW_COUNTER_##x], \ + 1ULL << (sfdp->log2_sessions + 1)); + + foreach_sfdp_flow_counter +#undef _ +} + +static void +sfdp_init_tenant_counters (sfdp_main_t *sfdp) +{ +#define _(x, y, z) \ + sfdp->tenant_session_ctr[SFDP_TENANT_SESSION_COUNTER_##x].name = y; \ + sfdp->tenant_session_ctr[SFDP_TENANT_SESSION_COUNTER_##x] \ + .stat_segment_name = "/sfdp/per_tenant_counters/" y; \ + vlib_validate_simple_counter ( \ + &sfdp->tenant_session_ctr[SFDP_TENANT_SESSION_COUNTER_##x], \ + 1ULL << (1 + sfdp->log2_tenants)); + + foreach_sfdp_tenant_session_counter +#undef _ +#define _(x, y, z) \ + sfdp->tenant_data_ctr[SFDP_TENANT_DATA_COUNTER_##x].name = y; \ + sfdp->tenant_data_ctr[SFDP_TENANT_DATA_COUNTER_##x].stat_segment_name = \ + "/sfdp/per_tenant_counters/" y; \ + vlib_validate_combined_counter ( \ + &sfdp->tenant_data_ctr[SFDP_TENANT_DATA_COUNTER_##x], \ + 1ULL << (1 + sfdp->log2_tenants)); + + foreach_sfdp_tenant_data_counter +#undef _ +} + +static void +sfdp_init_main_if_needed (sfdp_main_t *sfdp) +{ + static u32 done = 0; + vlib_thread_main_t *tm = vlib_get_thread_main (); + if (done) + return; + time_t epoch = time (NULL); + uword log_n_thread = max_log2 (tm->n_vlib_mains); + uword template_shift = + SFDP_SESSION_ID_TOTAL_BITS - SFDP_SESSION_ID_EPOCH_N_BITS - log_n_thread; + sfdp->session_id_ctr_mask = (((u64) 1 << template_shift) - 1); + /* initialize per-thread data */ + vec_validate (sfdp->per_thread_data, tm->n_vlib_mains - 1); + for (int i = 0; i < tm->n_vlib_mains; i++) + { + sfdp_per_thread_data_t *ptd = + vec_elt_at_index (sfdp->per_thread_data, i); + ptd->expired_sessions = 0; + ptd->session_id_template = (u64) epoch + << (template_shift + log_n_thread); + ptd->session_id_template |= (u64) i << template_shift; + ptd->session_freelist = 0; + } + if (vlib_num_workers ()) + clib_spinlock_init (&sfdp->session_lock); + + pool_init_fixed (sfdp->sessions, sfdp_num_sessions ()); + sfdp->free_sessions = sfdp_num_sessions (); + sfdp_init_ptd_counters (); + pool_init_fixed (sfdp->tenants, 1ULL << sfdp->log2_tenants); + + sfdp_init_tenant_counters (sfdp); + + clib_bihash_init_24_8 (&sfdp->table4, "sfdp ipv4 session table", + sfdp_ip4_num_buckets (), sfdp_ip4_mem_size ()); + clib_bihash_init_48_8 (&sfdp->table6, "sfdp ipv6 session table", + sfdp_ip6_num_buckets (), sfdp_ip6_mem_size ()); + clib_bihash_init_8_8 (&sfdp->tenant_idx_by_id, "sfdp tenant table", + sfdp_tenant_num_buckets (), sfdp_tenant_mem_size ()); + clib_bihash_init_8_8 (&sfdp->session_index_by_id, "session idx by id", + sfdp_ip4_num_buckets (), sfdp_ip4_mem_size ()); + + sfdp->icmp4_error_frame_queue_index = + vlib_frame_queue_main_init (sfdp_lookup_ip4_icmp_node.index, 0); + sfdp->icmp6_error_frame_queue_index = + vlib_frame_queue_main_init (sfdp_lookup_ip6_icmp_node.index, 0); + + /* User timer as default if no other has been registered yet. */ + if (!sfdp->expiry_callbacks.expire_or_evict_sessions) + { + sfdp_timer_register_as_expiry_module (); + } + + done = 1; +} + +static clib_error_t * +sfdp_init (vlib_main_t *vm) +{ + sfdp_main_t *sfdp = &sfdp_main; + clib_error_t *err; +#define _(val, default) sfdp->val = sfdp->val ? sfdp->val : default; + + _ (log2_sessions, SFDP_DEFAULT_LOG2_SESSIONS) + _ (log2_sessions_cache_per_thread, + SFDP_DEFAULT_LOG2_SESSIONS - SFDP_DEFAULT_LOG2_SESSIONS_CACHE_RATIO) + _ (log2_tenants, SFDP_DEFAULT_LOG2_TENANTS) +#undef _ + sfdp->no_main = sfdp->no_main && vlib_num_workers (); + + /* sfdp->eviction_sessions_margin came from early_config */ + if ((err = sfdp_set_eviction_sessions_margin ( + sfdp->eviction_sessions_margin)) != 0) + return err; + + // vlib_call_init_function (vm, sfdp_service_init); + return 0; +} + +void +sfdp_tenant_clear_counters (sfdp_main_t *sfdp, u32 tenant_idx) +{ +#define _(x, y, z) \ + sfdp->tenant_session_ctr[SFDP_TENANT_SESSION_COUNTER_##x].name = y; \ + sfdp->tenant_session_ctr[SFDP_TENANT_SESSION_COUNTER_##x] \ + .stat_segment_name = "/sfdp/per_tenant_counters/" y; \ + vlib_zero_simple_counter ( \ + &sfdp->tenant_session_ctr[SFDP_TENANT_SESSION_COUNTER_##x], tenant_idx); + + foreach_sfdp_tenant_session_counter +#undef _ +#define _(x, y, z) \ + sfdp->tenant_data_ctr[SFDP_TENANT_DATA_COUNTER_##x].name = y; \ + sfdp->tenant_data_ctr[SFDP_TENANT_DATA_COUNTER_##x].stat_segment_name = \ + "/sfdp/per_tenant_counters/" y; \ + vlib_zero_combined_counter ( \ + &sfdp->tenant_data_ctr[SFDP_TENANT_DATA_COUNTER_##x], tenant_idx); + + foreach_sfdp_tenant_data_counter +#undef _ +} + +static void +sfdp_tenant_init_timeouts (sfdp_tenant_t *tenant) +{ + for (u32 idx = 0; idx < SFDP_MAX_TIMEOUTS; idx++) + { + tenant->timeouts[idx] = sfdp_main.timeouts[idx].val; + } +} + +static void +sfdp_tenant_init_sp_nodes (sfdp_tenant_t *tenant) +{ + vlib_main_t *vm = vlib_get_main (); + vlib_node_t *node; + +#define _(sym, default, str) \ + node = vlib_get_node_by_name (vm, (u8 *) (default)); \ + tenant->sp_node_indices[SFDP_SP_NODE_##sym] = node->index; + + foreach_sfdp_sp_node +#undef _ +} + +clib_error_t * +sfdp_tenant_add_del (sfdp_main_t *sfdp, u32 tenant_id, u32 context_id, + u8 is_del) +{ + sfdp_init_main_if_needed (sfdp); + clib_bihash_kv_8_8_t kv = { .key = tenant_id, .value = 0 }; + clib_error_t *err = 0; + sfdp_tenant_t *tenant; + u32 tenant_idx; + u32 n_tenants = pool_elts (sfdp->tenants); + if (!is_del) + { + if (clib_bihash_search_inline_8_8 (&sfdp->tenant_idx_by_id, &kv)) + { + pool_get (sfdp->tenants, tenant); + tenant_idx = tenant - sfdp->tenants; + tenant->bitmaps[SFDP_FLOW_FORWARD] = SFDP_DEFAULT_BITMAP; + tenant->bitmaps[SFDP_FLOW_REVERSE] = SFDP_DEFAULT_BITMAP; + tenant->tenant_id = tenant_id; + tenant->context_id = context_id; + sfdp_tenant_init_timeouts (tenant); + sfdp_tenant_init_sp_nodes (tenant); + kv.key = tenant_id; + kv.value = tenant_idx; + clib_bihash_add_del_8_8 (&sfdp->tenant_idx_by_id, &kv, 1); + sfdp_tenant_clear_counters (sfdp, tenant_idx); + } + else + { + err = clib_error_return (0, + "Can't create tenant with id %d" + " (already exists with index %d)", + tenant_id, kv.value); + } + } + else + { + if (clib_bihash_search_inline_8_8 (&sfdp->tenant_idx_by_id, &kv)) + { + err = clib_error_return (0, + "Can't delete tenant with id %d" + " (not found)", + tenant_id); + } + else + { + sfdp_tenant_clear_counters (sfdp, kv.value); + pool_put_index (sfdp->tenants, kv.value); + clib_bihash_add_del_8_8 (&sfdp->tenant_idx_by_id, &kv, 0); + /* TODO: Notify other users of "tenants" (like gw)? + * maybe cb list? */ + } + } + if (!err && ((n_tenants == 1 && is_del) || (n_tenants == 0 && !is_del))) + sfdp_enable_disable_expiry (is_del); + return err; +} + +clib_error_t * +sfdp_set_services (sfdp_main_t *sfdp, u32 tenant_id, sfdp_bitmap_t bitmap, + u8 direction) +{ + sfdp_init_main_if_needed (sfdp); + clib_bihash_kv_8_8_t kv = { .key = tenant_id, .value = 0 }; + sfdp_tenant_t *tenant; + if (clib_bihash_search_inline_8_8 (&sfdp->tenant_idx_by_id, &kv)) + return clib_error_return ( + 0, "Can't assign service map: tenant id %d not found", tenant_id); + + tenant = sfdp_tenant_at_index (sfdp, kv.value); + tenant->bitmaps[direction] = bitmap; + return 0; +} + +clib_error_t * +sfdp_set_timeout (sfdp_main_t *sfdp, u32 tenant_id, u32 timeout_idx, + u32 timeout_val) +{ + sfdp_init_main_if_needed (sfdp); + clib_bihash_kv_8_8_t kv = { .key = tenant_id, .value = 0 }; + sfdp_tenant_t *tenant; + if (clib_bihash_search_inline_8_8 (&sfdp->tenant_idx_by_id, &kv)) + return clib_error_return ( + 0, "Can't configure timeout: tenant id %d not found", tenant_id); + tenant = sfdp_tenant_at_index (sfdp, kv.value); + tenant->timeouts[timeout_idx] = timeout_val; + return 0; +} + +clib_error_t * +sfdp_set_sp_node (sfdp_main_t *sfdp, u32 tenant_id, u32 sp_index, + u32 node_index) +{ + sfdp_init_main_if_needed (sfdp); + clib_bihash_kv_8_8_t kv = { .key = tenant_id, .value = 0 }; + sfdp_tenant_t *tenant; + if (clib_bihash_search_inline_8_8 (&sfdp->tenant_idx_by_id, &kv)) + return clib_error_return ( + 0, "Can't configure slow path node: tenant id %d not found", tenant_id); + tenant = sfdp_tenant_at_index (sfdp, kv.value); + tenant->sp_node_indices[sp_index] = node_index; + return 0; +} + +clib_error_t * +sfdp_set_icmp_error_node (sfdp_main_t *sfdp, u32 tenant_id, u8 is_ip6, + u32 node_index) +{ + sfdp_init_main_if_needed (sfdp); + vlib_main_t *vm = vlib_get_main (); + clib_bihash_kv_8_8_t kv = { .key = tenant_id, .value = 0 }; + sfdp_tenant_t *tenant; + uword next_index; + if (clib_bihash_search_inline_8_8 (&sfdp->tenant_idx_by_id, &kv)) + return clib_error_return ( + 0, "Can't configure icmp error node: tenant id %d not found", tenant_id); + tenant = sfdp_tenant_at_index (sfdp, kv.value); + if (is_ip6) + { + next_index = + vlib_node_add_next (vm, sfdp_lookup_ip6_icmp_node.index, node_index); + tenant->icmp6_lookup_next = next_index; + } + else + { + next_index = + vlib_node_add_next (vm, sfdp_lookup_ip4_icmp_node.index, node_index); + tenant->icmp4_lookup_next = next_index; + } + return 0; +} + +int +sfdp_create_session (vlib_main_t *vm, vlib_buffer_t *b, u32 context_id, + u32 thread_index, u32 tenant_index, u32 *session_index, + int is_ipv6) +{ + return sfdp_create_session_with_scope_index ( + vm, b, context_id, thread_index, tenant_index, session_index, 0, is_ipv6); +} + +int +sfdp_create_session_with_scope_index (vlib_main_t *vm, vlib_buffer_t *b, + u32 context_id, u32 thread_index, + u32 tenant_index, u32 *session_index, + u32 scope_index, int is_ipv6) +{ + sfdp_main_t *sfdp = &sfdp_main; + sfdp_session_ip4_key_t k4 = {}; + sfdp_session_ip6_key_t k6 = {}; + void *k = is_ipv6 ? (void *) &k6 : (void *) &k4; + u64 lookup_val = 0, h = 0; + i16 l4_hdr_offset = 0; + u8 slow_path = 0; + sfdp_tenant_t *tenant = sfdp_tenant_at_index (sfdp, tenant_index); + sfdp_per_thread_data_t *ptd = 0; + f64 time_now = vlib_time_now (vm); + u8 bound_to_thread = (u16) thread_index != SFDP_UNBOUND_THREAD_INDEX; + + if (bound_to_thread) + ptd = vec_elt_at_index (sfdp->per_thread_data, thread_index); + + if (is_ipv6) + { + sfdp_calc_key_v6 (b, context_id, k, &lookup_val, &h, &l4_hdr_offset, + slow_path); + } + else + { + sfdp_calc_key_v4 (b, context_id, k, &lookup_val, &h, &l4_hdr_offset, + slow_path); + } + int err = sfdp_create_session_inline (sfdp, ptd, tenant, tenant_index, + thread_index, time_now, k, &h, + &lookup_val, scope_index, is_ipv6); + + if (bound_to_thread && err == 0) + { + *session_index = sfdp_session_index_from_lookup (lookup_val); + sfdp_notify_new_sessions (sfdp, session_index, 1); + } + return err; +} + +void +sfdp_normalise_ip4_key (sfdp_session_t *session, + sfdp_session_ip4_key_t *result, u8 key_idx) +{ + sfdp_session_ip4_key_t *skey = &session->keys[key_idx].key4; + sfdp_ip4_key_t *key = &skey->ip4_key; + u8 pseudo_dir = session->pseudo_dir[key_idx]; + u8 proto = session->proto; + u8 with_port = proto == IP_PROTOCOL_UDP || proto == IP_PROTOCOL_TCP || + proto == IP_PROTOCOL_ICMP; + + result->ip4_key.as_u64x2 = key->as_u64x2; + result->as_u64 = skey->as_u64; + if (with_port && pseudo_dir) + { + result->ip4_key.ip_addr_lo = key->ip_addr_hi; + result->ip4_key.port_lo = clib_net_to_host_u16 (key->port_hi); + result->ip4_key.ip_addr_hi = key->ip_addr_lo; + result->ip4_key.port_hi = clib_net_to_host_u16 (key->port_lo); + } + else + { + result->ip4_key.ip_addr_lo = key->ip_addr_lo; + result->ip4_key.port_lo = clib_net_to_host_u16 (key->port_lo); + result->ip4_key.ip_addr_hi = key->ip_addr_hi; + result->ip4_key.port_hi = clib_net_to_host_u16 (key->port_hi); + } +} + +void +sfdp_normalise_ip6_key (sfdp_session_t *session, + sfdp_session_ip6_key_t *result, u8 key_idx) +{ + sfdp_session_ip6_key_t *skey = &session->keys[key_idx].key6; + sfdp_ip6_key_t *key = &skey->ip6_key; + u8 pseudo_dir = session->pseudo_dir[key_idx]; + u8 proto = session->proto; + u8 with_port = proto == IP_PROTOCOL_UDP || proto == IP_PROTOCOL_TCP || + proto == IP_PROTOCOL_ICMP; + + result->ip6_key.as_u64x4 = key->as_u64x4; + result->as_u64 = skey->as_u64; + if (with_port && pseudo_dir) + { + result->ip6_key.ip6_addr_lo = key->ip6_addr_hi; + result->ip6_key.port_lo = clib_net_to_host_u16 (key->port_hi); + result->ip6_key.ip6_addr_hi = key->ip6_addr_lo; + result->ip6_key.port_hi = clib_net_to_host_u16 (key->port_lo); + } + else + { + result->ip6_key.ip6_addr_lo = key->ip6_addr_lo; + result->ip6_key.port_lo = clib_net_to_host_u16 (key->port_lo); + result->ip6_key.ip6_addr_hi = key->ip6_addr_hi; + result->ip6_key.port_hi = clib_net_to_host_u16 (key->port_hi); + } +} + +int +sfdp_bihash_add_del_inline_with_hash_24_8 (clib_bihash_24_8_t *h, + clib_bihash_kv_24_8_t *kv, u64 hash, + u8 is_add) +{ + return clib_bihash_add_del_inline_with_hash_24_8 (h, kv, hash, is_add, 0, 0, + 0, 0); +} + +int +sfdp_bihash_add_del_inline_with_hash_48_8 (clib_bihash_48_8_t *h, + clib_bihash_kv_48_8_t *kv, u64 hash, + u8 is_add) +{ + return clib_bihash_add_del_inline_with_hash_48_8 (h, kv, hash, is_add, 0, 0, + 0, 0); +} + +static clib_error_t * +sfdp_config (vlib_main_t *vm, unformat_input_t *input) +{ + sfdp_main_t *sfdp = &sfdp_main; + u32 eviction_sessions_margin = ~0; + u8 sessions_cache_specified = 0; + + while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (input, "sessions-log2 %u", &sfdp->log2_sessions)) + ; + else if (unformat (input, "sessions-per-thread-cache-log2 %u", + &sfdp->log2_sessions_cache_per_thread)) + { + sessions_cache_specified = 1; + } + else if (unformat (input, "tenants-log2 %u", &sfdp->log2_tenants)) + ; + else if (unformat (input, "eviction-sessions-margin %u", + &eviction_sessions_margin)) + ; + else if (unformat (input, "no-main")) + { + /* Disable only if there are workers */ + if (vlib_num_workers ()) + sfdp->no_main = 1; + else + clib_warning ("Ignoring no-main option: no workers"); + } + else + { + return clib_error_return (0, "Invalid SFDP plugin config"); + } + } + + if (!sessions_cache_specified) + { + if (sfdp->log2_sessions > SFDP_DEFAULT_LOG2_SESSIONS_CACHE_RATIO + 4) + { + sfdp->log2_sessions_cache_per_thread = + sfdp->log2_sessions - SFDP_DEFAULT_LOG2_SESSIONS_CACHE_RATIO; + } + else + { + /* If the total number of sessions is really small (can happen in + * tests) we don't use session caching by default to protect against + * exhaustion. */ + sfdp->log2_sessions_cache_per_thread = 0; + } + } + + sfdp->eviction_sessions_margin = eviction_sessions_margin; + + return 0; +} + +/* sfdp { [sessions-log2 ] [tenants-log2 ] [eviction-sessions-margin ] + * } config. */ +VLIB_EARLY_CONFIG_FUNCTION (sfdp_config, "sfdp"); + +VLIB_INIT_FUNCTION (sfdp_init); + +VLIB_PLUGIN_REGISTER () = { + .version = SFDP_CORE_PLUGIN_BUILD_VER, + .description = "sfdp Core Plugin", +}; diff --git a/src/vnet/sfdp/sfdp.h b/src/vnet/sfdp/sfdp.h new file mode 100644 index 00000000000..080df28d3d4 --- /dev/null +++ b/src/vnet/sfdp/sfdp.h @@ -0,0 +1,891 @@ +/* SPDX-License-Identifier: Apache-2.0 + * Copyright (c) 2025 Cisco Systems, Inc. + */ + +#ifndef __included_sfdp_h__ +#define __included_sfdp_h__ + +#include +#include +#include + +#include +#include +#include + +#include +#include +#include + +#include + +#include +#include +#include + +/* Sessions constants */ +#define SFDP_DEFAULT_LOG2_SESSIONS 19 /* 500k sessions */ +#define SFDP_DEFAULT_LOG2_SESSIONS_CACHE_RATIO \ + 7 /* 1/128 cached sessions per thread */ +#define SFDP_LOG2_MEM_PER_SESSION 12 /* 4kB per session */ + +/* Tenants constants */ +#define SFDP_DEFAULT_LOG2_TENANTS 15 /* 32k tenants */ +#define SFDP_LOG2_MEM_PER_TENANT 6 /* 64B per tenant */ + +#define SFDP_SESSION_ID_TOTAL_BITS 64 +#define SFDP_SESSION_ID_EPOCH_N_BITS 16 + +#define SFDP_BITMAP_SIZE 64 +#define SFDP_LOOKUP_NEXT_INDEX_FOR_SCOPE(scope) (scope + SFDP_BITMAP_SIZE) + +/* Convention session_index is 31 bit + * Flow_index (embedded in vlib_buffer_t as "flow_id") + * Flow_index = (session_index << 1) + !(is_forward) + + * A flow is "forward" if it's going from initiator to responder + * The packet_direction is 1 if normalisation happened 0 otherwise + * the stored_direction of a flow is the packet direction of its FSOL + * Pseudo_flow_index = (session_index << 1) + stored_direction + * + * Note that for a packet belonging to a flow + * ---------------------------------------------------------- + * !(is_forward) = packet_direction ^ stored_direction + * Flow_index = Pseudo_flow_index ^ stored_direction + * ---------------------------------------------------------- + */ + +typedef enum +{ + SFDP_SESSION_TYPE_IP4, + SFDP_SESSION_TYPE_IP6, + SFDP_SESSION_TYPE_USER, + /* last */ + SFDP_SESSION_N_TYPES, +} sfdp_session_type_t; + +#define foreach_sfdp_session_state \ + _ (FSOL, "embryonic") \ + _ (ESTABLISHED, "established") \ + _ (TIME_WAIT, "time-wait") \ + /* Free session does not belong to main pool anymore, but is unused */ \ + _ (FREE, "free") + +typedef enum +{ +#define _(val, str) SFDP_SESSION_STATE_##val, + foreach_sfdp_session_state +#undef _ + SFDP_SESSION_N_STATE +} sfdp_session_state_t; + +#define foreach_sfdp_flow_counter _ (LOOKUP, "lookup") + +typedef enum +{ +#define _(x, y) SFDP_FLOW_COUNTER_##x, + foreach_sfdp_flow_counter +#undef _ + SFDP_FLOW_N_COUNTER +} sfdp_flow_counter_index_t; + +#define foreach_sfdp_tenant_session_counter \ + _ (CREATED, "created", "created sessions") \ + _ (REMOVED, "removed", "removed sessions") + +#define foreach_sfdp_tenant_data_counter \ + _ (INCOMING, "incoming", "incoming data into tenant") \ + _ (OUTGOING, "outgoing", "outgoing data out of tenant") + +typedef enum +{ +#define _(x, y, z) SFDP_TENANT_SESSION_COUNTER_##x, + foreach_sfdp_tenant_session_counter +#undef _ + SFDP_TENANT_SESSION_N_COUNTER +} sfdp_tenant_session_counter_index_t; + +typedef enum +{ +#define _(x, y, z) SFDP_TENANT_DATA_COUNTER_##x, + foreach_sfdp_tenant_data_counter +#undef _ + SFDP_TENANT_DATA_N_COUNTER +} sfdp_tenant_data_counter_index_t; + +enum +{ + SFDP_FLOW_FORWARD = 0, + SFDP_FLOW_REVERSE = 1, + SFDP_FLOW_F_B_N = 2 +}; + +enum +{ + SFDP_SESSION_KEY_PRIMARY, + SFDP_SESSION_KEY_SECONDARY, + SFDP_SESSION_N_KEY +}; +/* Flags to determine key validity in the session */ +#define foreach_sfdp_session_key_flag \ + _ (PRIMARY_VALID_IP4, 0x1, "primary-valid-ip4") \ + _ (PRIMARY_VALID_IP6, 0x2, "primary-valid-ip6") \ + _ (SECONDARY_VALID_IP4, 0x4, "secondary-valid-ip4") \ + _ (SECONDARY_VALID_IP6, 0x8, "secondary-valid-ip6") \ + _ (PRIMARY_VALID_USER, 0x10, "primary-valid-user") \ + _ (SECONDARY_VALID_USER, 0x20, "secondary-valid-user") + +enum +{ +#define _(x, n, s) SFDP_SESSION_KEY_FLAG_##x = n, + foreach_sfdp_session_key_flag +#undef _ +}; + +#define foreach_sfdp_sp_node \ + _ (IP4_REASS, "error-drop", "sp-ip4-reassembly") \ + _ (IP6_REASS, "error-drop", "sp-ip6-reassembly") \ + _ (IP4_UNKNOWN_PROTO, "error-drop", "sp-ip4-unknown-proto") \ + _ (IP6_UNKNOWN_PROTO, "error-drop", "sp-ip6-unknown-proto") \ + _ (IP4_ICMP4_ERROR, "error-drop", "sp-ip4-icmp4-error") \ + _ (IP6_ICMP6_ERROR, "error-drop", "sp-ip4-icmp6-error") \ + _ (IP4_TABLE_OVERFLOW, "error-drop", "sp-ip4-table-overflow") \ + _ (IP6_TABLE_OVERFLOW, "error-drop", "sp-ip6-table-overflow") + +enum +{ +#define _(name, val, str) SFDP_SP_NODE_##name, + foreach_sfdp_sp_node +#undef _ + SFDP_N_SP_NODES +}; + +typedef union +{ + struct + { + union + { + u32 spi; + struct + { + u16 port_lo; + u16 port_hi; + }; + }; + u8 unused; + u8 proto; + u16 unused2; + u32 ip_addr_lo; + u32 ip_addr_hi; + }; + u8x16u as_u8x16; + u32x4u as_u32x4; + u64x2u as_u64x2; +} __clib_packed sfdp_ip4_key_t; +STATIC_ASSERT_SIZEOF (sfdp_ip4_key_t, 16); + +typedef union +{ + struct + { + union + { + u32 spi; + struct + { + u16 port_lo; + u16 port_hi; + }; + }; + u16 unused; + u8 proto; + u8 unused2; + ip6_address_t ip6_addr_lo; + ip6_address_t ip6_addr_hi; + }; + struct + { + u32x2u as_u32x2; + u32x8u as_u32x8; + }; + struct + { + u16x4u as_u16x4; + u16x16u as_u16x16; + }; + struct + { + u8x8u as_u8x8; + u8x16u as_u8x16[2]; + }; + struct + { + u64 as_u64; + u64x4u as_u64x4; + }; +} __clib_packed sfdp_ip6_key_t; +STATIC_ASSERT_SIZEOF (sfdp_ip6_key_t, 40); + +typedef struct +{ + sfdp_ip4_key_t ip4_key; + + union + { + struct + { + u32 context_id; + u8 zeros[4]; + }; + u64 as_u64; + }; +} __clib_packed sfdp_session_ip4_key_t; +STATIC_ASSERT_SIZEOF (sfdp_session_ip4_key_t, 24); + +typedef struct +{ + sfdp_ip6_key_t ip6_key; + + union + { + struct + { + u32 context_id; + u8 zeros[4]; + }; + u64 as_u64; + }; +} __clib_packed sfdp_session_ip6_key_t; +STATIC_ASSERT_SIZEOF (sfdp_session_ip6_key_t, 48); + +typedef union +{ + sfdp_session_ip4_key_t key4; + sfdp_session_ip6_key_t key6; +} sfdp_session_ip46_key_t; + +typedef union +{ + sfdp_ip4_key_t key4; + sfdp_ip6_key_t key6; +} sfdp_ip46_key_t; + +typedef union +{ + clib_bihash_kv_24_8_t kv4; + clib_bihash_kv_48_8_t kv6; +} sfdp_bihash_kv46_t; + +#define SFDP_SESSION_IP46_KEYS_TYPE(n) \ + union \ + { \ + sfdp_session_ip4_key_t keys4[(n)]; \ + sfdp_session_ip6_key_t keys6[(n)]; \ + } + +#define SFDP_UNBOUND_THREAD_INDEX ((u16) ~0) +typedef struct sfdp_session +{ + CLIB_CACHE_LINE_ALIGN_MARK (cache0); + sfdp_bitmap_t bitmaps[SFDP_FLOW_F_B_N]; + u64 session_id; + u64 expiry_opaque[2]; + session_version_t session_version; + u8 state; /* see sfdp_session_state_t */ + u8 proto; + u16 tenant_idx; + u16 owning_thread_index; + u8 unused0[16]; + u8 pseudo_dir[SFDP_SESSION_N_KEY]; + u8 type; /* see sfdp_session_type_t */ + u8 key_flags; + u16 parser_index[SFDP_SESSION_N_KEY]; + u8 scope_index; + u8 unused1[55]; + CLIB_CACHE_LINE_ALIGN_MARK (cache1); + union + { + sfdp_session_ip46_key_t keys[SFDP_SESSION_N_KEY]; + u8 keys_data[SFDP_SESSION_N_KEY][64]; + }; +} sfdp_session_t; /* TODO: optimise mem layout, this is bad */ +#if CLIB_CACHE_LINE_BYTES == 64 +STATIC_ASSERT ((STRUCT_OFFSET_OF (sfdp_session_t, cache1) - + STRUCT_OFFSET_OF (sfdp_session_t, cache0)) == + 2 * CLIB_CACHE_LINE_BYTES, + "cache line alignment is broken for sfdp_session_t"); +#else +STATIC_ASSERT ((STRUCT_OFFSET_OF (sfdp_session_t, cache1) - + STRUCT_OFFSET_OF (sfdp_session_t, cache0)) == + CLIB_CACHE_LINE_BYTES, + "cache line alignment is broken for sfdp_session_t"); +#endif + +/* The members of the second cacheline are bigger than 64 bytes, thus due to + * the alignment constraints, the struct size depends on the cacheline size. */ +#if CLIB_CACHE_LINE_BYTES == 64 +STATIC_ASSERT_SIZEOF (sfdp_session_t, 4 * CLIB_CACHE_LINE_BYTES); +#else +STATIC_ASSERT_SIZEOF (sfdp_session_t, 2 * CLIB_CACHE_LINE_BYTES); +#endif + +always_inline void * +sfdp_get_session_expiry_opaque (sfdp_session_t *s) +{ + return (void *) s->expiry_opaque; +} + +typedef struct +{ + u32 *expired_sessions; // per thread expired session vector + u64 session_id_ctr; + u64 session_id_template; + u32 *session_freelist; + u32 n_sessions; /* Number of sessions belonging to this thread */ +} sfdp_per_thread_data_t; + +// TODO: Find a way to abstract, or share, timeout definition. +// They should be either private to timer.h, or sharable between them. + +/* Per-tenant timeout type */ + +typedef struct sfdp_timeout +{ + const char *name; // Timeout name used to parse config and display + u32 val; // Timeout value used when creating a new tenant +} sfdp_timeout_t; + +STATIC_ASSERT_SIZEOF (sfdp_timeout_t[8], 16 * 8); + +/* Maximum number of tenant timers configurable */ +#define SFDP_MAX_TIMEOUTS 8 + +typedef struct +{ + u32 tenant_id; + u32 context_id; + sfdp_bitmap_t bitmaps[SFDP_FLOW_F_B_N]; + u32 timeouts[SFDP_MAX_TIMEOUTS]; + u32 sp_node_indices[SFDP_N_SP_NODES]; + uword icmp4_lookup_next; + uword icmp6_lookup_next; + +} sfdp_tenant_t; + +typedef struct +{ + /* key = (u64) tenant_id; val= (u64) tenant_idx; */ + clib_bihash_8_8_t tenant_idx_by_id; + + /* (sfdp_session_ip4_key_t) -> (thread_index(32 MSB),session_index(31 bits), + * stored_direction (1 LSB)) */ + clib_bihash_24_8_t table4; + + /* (sfdp_session_ip6_key_t) -> (thread_index(32 MSB),session_index(31 bits), + * stored_direction (1 LSB)) */ + clib_bihash_48_8_t table6; + clib_bihash_8_8_t session_index_by_id; + clib_spinlock_t session_lock; + sfdp_session_t *sessions; /* fixed pool */ + u32 free_sessions; + vlib_combined_counter_main_t per_session_ctr[SFDP_FLOW_N_COUNTER]; + u32 *frame_queue_index_per_scope; + uword *handoff_node_index_per_scope; + uword *ip4_lookup_node_index_per_scope; + uword *ip6_lookup_node_index_per_scope; + uword **parser_node_index_per_scope_per_original; + u32 icmp4_error_frame_queue_index; + u32 icmp6_error_frame_queue_index; + u64 session_id_ctr_mask; + vlib_simple_counter_main_t tenant_session_ctr[SFDP_TENANT_SESSION_N_COUNTER]; + vlib_combined_counter_main_t tenant_data_ctr[SFDP_TENANT_DATA_N_COUNTER]; + + /* pool of tenants */ + sfdp_tenant_t *tenants; + + /* per-thread data */ + sfdp_per_thread_data_t *per_thread_data; + u16 msg_id_base; + sfdp_expiry_callbacks_t expiry_callbacks; + + /* Timer names and defaults. + * Timers with name equal to NULL are not configured. */ + sfdp_timeout_t timeouts[SFDP_MAX_TIMEOUTS]; + + u32 log2_sessions; + u32 log2_sessions_cache_per_thread; + u32 log2_tenants; + + /* Per-thread number of sessions margin before eviction. + * See sfdp_set_eviction_sessions_margin function more information. */ + u32 eviction_sessions_margin; + + /* If this is set, don't run polling nodes on main */ + int no_main; +} sfdp_main_t; + +typedef struct +{ + u32 scope_index; +} sfdp_lookup_node_runtime_data_t; + +#define sfdp_foreach_timeout(sfdp, timeout) \ + for (timeout = (sfdp)->timeouts; \ + timeout < (sfdp)->timeouts + SFDP_MAX_TIMEOUTS; timeout++) + +#define sfdp_foreach_session(sfdp, i, s) \ + pool_foreach_index (i, (sfdp)->sessions) \ + if ((s = sfdp_session_at_index (i)) && s->state != SFDP_SESSION_STATE_FREE) + +extern sfdp_main_t sfdp_main; +extern vlib_node_registration_t sfdp_handoff_node; +extern vlib_node_registration_t sfdp_lookup_ip4_icmp_node; +extern vlib_node_registration_t sfdp_lookup_ip6_icmp_node; +extern vlib_node_registration_t sfdp_lookup_ip4_node; +extern vlib_node_registration_t sfdp_lookup_ip6_node; +format_function_t format_sfdp_session; +format_function_t format_sfdp_ipv4_context_id; +format_function_t format_sfdp_ipv4_ingress; +format_function_t format_sfdp_ipv4_egress; +format_function_t format_sfdp_ipv6_context_id; +format_function_t format_sfdp_ipv6_ingress; +format_function_t format_sfdp_ipv6_egress; +format_function_t format_sfdp_session_detail; +format_function_t format_sfdp_session_state; +format_function_t format_sfdp_session_type; +format_function_t format_sfdp_tenant; +format_function_t format_sfdp_tenant_extra; +format_function_t format_sfdp_sp_node; +unformat_function_t unformat_sfdp_service; +unformat_function_t unformat_sfdp_service_bitmap; +unformat_function_t unformat_sfdp_sp_node; +unformat_function_t unformat_sfdp_timeout_name; + +static_always_inline u64 +sfdp_num_sessions () +{ + return (1ULL << (sfdp_main.log2_sessions)); +} + +static_always_inline u64 +sfdp_num_sessions_cache_per_thread () +{ + return (1ULL << (sfdp_main.log2_sessions_cache_per_thread)); +} + +static_always_inline int +sfdp_table_is_full () +{ + /* Note: We use >= to be on the safe side... */ + return pool_elts (sfdp_main.sessions) >= sfdp_num_sessions (); +} + +static_always_inline u64 +sfdp_real_active_sessions () +{ + u64 sessions = pool_elts (sfdp_main.sessions); + sfdp_per_thread_data_t *ptd; + vec_foreach (ptd, sfdp_main.per_thread_data) + { + sessions -= vec_len (ptd->session_freelist); + } + return sessions; +} + +// Number of sessions that can be allocated by threads in the global pool +static_always_inline u64 +sfdp_remaining_sessions_in_pool () +{ + return sfdp_num_sessions () - pool_elts (sfdp_main.sessions); +} + +// Return the number of sessions that this thread should be able to allocate +static_always_inline u64 +sfdp_sessions_available_for_this_thread (sfdp_per_thread_data_t *ptd) +{ + return sfdp_remaining_sessions_in_pool () + vec_len (ptd->session_freelist); +} + +static_always_inline u64 +sfdp_session_num_thread_factor () +{ + u32 n_workers = vlib_num_workers (); + return n_workers ? n_workers : 1; +} + +static_always_inline u64 +sfdp_ip4_num_buckets () +{ + return (1ULL << (sfdp_main.log2_sessions - 1)); +} + +static_always_inline u64 +sfdp_ip4_mem_size () +{ + return (1ULL << (sfdp_main.log2_sessions + SFDP_LOG2_MEM_PER_SESSION)); +} + +static_always_inline u64 +sfdp_ip6_num_buckets () +{ + return (1ULL << (sfdp_main.log2_sessions - 1)); +} + +static_always_inline u64 +sfdp_ip6_mem_size () +{ + return (1ULL << (sfdp_main.log2_sessions + SFDP_LOG2_MEM_PER_SESSION)); +} + +static_always_inline u64 +sfdp_tenant_num_buckets () +{ + return (1ULL << (sfdp_main.log2_tenants - 2)); +} + +static_always_inline u64 +sfdp_tenant_mem_size () +{ + return (1ULL << (sfdp_main.log2_tenants + SFDP_LOG2_MEM_PER_TENANT)); +} + +static_always_inline sfdp_per_thread_data_t * +sfdp_get_per_thread_data (u32 thread_index) +{ + return vec_elt_at_index (sfdp_main.per_thread_data, thread_index); +} + +static_always_inline u32 +sfdp_session_index_from_lookup (u64 val) +{ + return (val & (~(u32) 0)) >> 1; +} + +static_always_inline u8 +sfdp_thread_index_from_lookup (u64 val) +{ + return (val >> 32) & 0xFF; +} + +static_always_inline u16 +sfdp_session_version_from_lookup (u64 val) +{ + return (val >> 48); +} + +static_always_inline u32 +sfdp_packet_dir_from_lookup (u64 val) +{ + return val & 0x1; +} + +static_always_inline u32 +sfdp_pseudo_flow_index_from_lookup (u64 val) +{ + return val & (~(u32) 0); +} + +/** The format of the lookup value is composed of + * 1. 16 bits of session version + * (8 bits of padding) + * 2. 8 bits of thread index + * 3. 32 bits of pseudo flow index + **/ +static_always_inline u64 +sfdp_session_mk_table_value (u8 thread_index, u32 pseudo_flow_index, + session_version_t session_version) +{ + u64 value = 0; + value |= ((u64) session_version) << 48; + value |= ((u64) thread_index) << 32; + value |= (u64) pseudo_flow_index; + return value; +} + +static_always_inline sfdp_session_t * +sfdp_session_at_index (u32 idx) +{ + return pool_elt_at_index (sfdp_main.sessions, idx); +} + +static_always_inline sfdp_session_t * +sfdp_session_at_index_no_check (u32 idx) +{ + return sfdp_main.sessions + idx; +} + +static_always_inline int +sfdp_session_at_index_is_active (u32 idx) +{ + // TODO: We could use SFDP_SESSION_STATE_FREE alone maybe if its value was + // zero. + sfdp_main_t *sfdp = &sfdp_main; + return (!pool_is_free_index (sfdp->sessions, idx)) && + (sfdp->sessions[idx].state != SFDP_SESSION_STATE_FREE); +} + +static_always_inline sfdp_session_t * +sfdp_session_at_index_if_valid (u32 idx) +{ + return sfdp_session_at_index_is_active (idx) ? sfdp_session_at_index (idx) : + NULL; +} + +static_always_inline u32 +sfdp_mk_flow_index (u32 session_index, u8 dir) +{ + return (session_index << 1) | !(dir == SFDP_FLOW_FORWARD); +} + +static_always_inline u32 +sfdp_session_from_flow_index (u32 flow_index) +{ + return flow_index >> 1; +} + +static_always_inline u32 +sfdp_direction_from_flow_index (u32 flow_index) +{ + return (flow_index & 0x1); +} + +static_always_inline sfdp_tenant_t * +sfdp_tenant_at_index (sfdp_main_t *sfdpm, u32 idx) +{ + return pool_elt_at_index (sfdpm->tenants, idx); +} + +static_always_inline u8 +sfdp_session_n_keys (sfdp_session_t *session) +{ + if (session->key_flags & (SFDP_SESSION_KEY_FLAG_SECONDARY_VALID_IP4 | + SFDP_SESSION_KEY_FLAG_SECONDARY_VALID_IP6)) + return 2; + else + return 1; +} + +static_always_inline void +sfdp_notify_new_sessions (sfdp_main_t *sfdpm, u32 *new_sessions, u32 len) +{ + sfdpm->expiry_callbacks.notify_new_sessions (new_sessions, len); + SFDP_CALLBACKS_CALL (notify_new_sessions, new_sessions, len); +} + +static_always_inline void +sfdp_notify_deleted_sessions (sfdp_main_t *sfdpm, u32 *deleted_sessions, + u32 len) +{ + SFDP_CALLBACKS_CALL (notify_deleted_sessions, deleted_sessions, len); +} + +static_always_inline u32 +sfdp_alloc_session (sfdp_main_t *sfdp, sfdp_per_thread_data_t *ptd, + bool bound_to_thread) +{ + u32 res = ~0; + u32 n_local_elem; + sfdp_session_t *session; + + if (bound_to_thread) + n_local_elem = vec_len (ptd->session_freelist); + + if (bound_to_thread && n_local_elem) + res = vec_pop (ptd->session_freelist); + else + { + clib_spinlock_lock_if_init (&sfdp->session_lock); + if (sfdp->free_sessions) + { + pool_get (sfdp->sessions, session); + sfdp->free_sessions -= 1; + clib_spinlock_unlock_if_init (&sfdp->session_lock); + res = session - sfdp->sessions; + } + else + clib_spinlock_unlock_if_init (&sfdp->session_lock); + } + if (bound_to_thread && res != ~0) + ptd->n_sessions += 1; + return res; +} + +static_always_inline void +sfdp_free_session (sfdp_main_t *sfdp, sfdp_per_thread_data_t *ptd, + u32 session_index) +{ + if (ptd && + vec_len (ptd->session_freelist) < sfdp_num_sessions_cache_per_thread ()) + vec_add1 (ptd->session_freelist, session_index); + else + { + clib_spinlock_lock_if_init (&sfdp->session_lock); + pool_put_index (sfdp->sessions, session_index); + sfdp->free_sessions += 1; + clib_spinlock_unlock_if_init (&sfdp->session_lock); + } + if (ptd) + ptd->n_sessions -= 1; +} + +static_always_inline void +sfdp_session_generate_and_set_id (sfdp_main_t *sfdp, + sfdp_per_thread_data_t *ptd, + sfdp_session_t *session) +{ + clib_bihash_kv_8_8_t kv2; + u64 value; + u32 session_idx = session - sfdp->sessions; + u32 pseudo_flow_idx = (session_idx << 1); + u32 thread_index = session->owning_thread_index; + u64 session_id = (ptd->session_id_ctr & (sfdp->session_id_ctr_mask)) | + ptd->session_id_template; + ptd->session_id_ctr += + 2; /* two at a time, because last bit is reserved for direction */ + session->session_id = session_id; + value = sfdp_session_mk_table_value (thread_index, pseudo_flow_idx, + session->session_version); + kv2.key = session_id; + kv2.value = value; + clib_bihash_add_del_8_8 (&sfdp->session_index_by_id, &kv2, 1); +} + +/* Internal function to create a new session. + * sfdp_notify_new_sessions must be called afterward. If thread_index is ~0, + * the session is created with no assigned thread + * Return value: 0 --> SUCCESS + 1 --> Unable to allocate session + 2 --> Collision */ +static_always_inline int +sfdp_create_session_inline (sfdp_main_t *sfdp, sfdp_per_thread_data_t *ptd, + sfdp_tenant_t *tenant, u16 tenant_idx, + u16 thread_index, f64 time_now, void *k, u64 *h, + u64 *lookup_val, u32 scope_index, int is_ipv6) +{ + sfdp_bihash_kv46_t kv = {}; + u64 value; + u8 proto; + sfdp_session_t *session; + u32 session_idx; + u32 pseudo_flow_idx; + + session_idx = + sfdp_alloc_session (sfdp, ptd, thread_index != SFDP_UNBOUND_THREAD_INDEX); + + if (session_idx == ~0) + return 1; + + session = pool_elt_at_index (sfdp->sessions, session_idx); + + pseudo_flow_idx = (lookup_val[0] & 0x1) | (session_idx << 1); + value = sfdp_session_mk_table_value (thread_index, pseudo_flow_idx, + session->session_version + 1); + if (is_ipv6) + { + clib_memcpy_fast (&kv.kv6.key, k, sizeof (kv.kv6.key)); + kv.kv6.value = value; + proto = ((sfdp_session_ip6_key_t *) k)->ip6_key.proto; + if (clib_bihash_add_del_48_8 (&sfdp->table6, &kv.kv6, 2)) + { + /* colision - remote thread created same entry */ + sfdp_free_session (sfdp, ptd, session_idx); + return 2; + } + session->type = SFDP_SESSION_TYPE_IP6; + session->key_flags = SFDP_SESSION_KEY_FLAG_PRIMARY_VALID_IP6; + } + else + { + clib_memcpy_fast (&kv.kv4.key, k, sizeof (kv.kv4.key)); + kv.kv4.value = value; + proto = ((sfdp_session_ip4_key_t *) k)->ip4_key.proto; + if (clib_bihash_add_del_24_8 (&sfdp->table4, &kv.kv4, 2)) + { + /* colision - remote thread created same entry */ + sfdp_free_session (sfdp, ptd, session_idx); + return 2; + } + session->type = SFDP_SESSION_TYPE_IP4; + session->key_flags = SFDP_SESSION_KEY_FLAG_PRIMARY_VALID_IP4; + } + // TODO: Would be nice to do this upon free instead to have avoid having to + // check + // if the session is valid at all when checking invalidation. + session->session_version += 1; + session->tenant_idx = tenant_idx; + session->state = SFDP_SESSION_STATE_FSOL; + session->owning_thread_index = thread_index; + session->scope_index = scope_index; + if (ptd) + sfdp_session_generate_and_set_id (sfdp, ptd, session); + + clib_memcpy_fast (session->bitmaps, tenant->bitmaps, + sizeof (session->bitmaps)); + if (is_ipv6) + clib_memcpy_fast (&session->keys[SFDP_SESSION_KEY_PRIMARY].key6, k, + sizeof (session->keys[0].key6)); + else + clib_memcpy_fast (&session->keys[SFDP_SESSION_KEY_PRIMARY].key4, k, + sizeof (session->keys[0].key4)); + session->pseudo_dir[SFDP_SESSION_KEY_PRIMARY] = lookup_val[0] & 0x1; + session->proto = proto; + + lookup_val[0] ^= value; + /* Bidirectional counter zeroing */ + vlib_zero_combined_counter (&sfdp->per_session_ctr[SFDP_FLOW_COUNTER_LOOKUP], + lookup_val[0]); + vlib_zero_combined_counter (&sfdp->per_session_ctr[SFDP_FLOW_COUNTER_LOOKUP], + lookup_val[0] | 0x1); + vlib_increment_simple_counter ( + &sfdp->tenant_session_ctr[SFDP_TENANT_SESSION_COUNTER_CREATED], + thread_index, tenant_idx, 1); + return 0; +} +int sfdp_create_session (vlib_main_t *vm, vlib_buffer_t *b, u32 context_id, + u32 thread_index, u32 tenant_index, + u32 *session_index, int is_ipv6); +int sfdp_create_session_with_scope_index (vlib_main_t *vm, vlib_buffer_t *b, + u32 context_id, u32 thread_index, + u32 tenant_index, u32 *session_index, + u32 scope_index, int is_ipv6); + +clib_error_t *sfdp_tenant_add_del (sfdp_main_t *sfdp, u32 tenant_id, + u32 context_id, u8 is_del); +clib_error_t *sfdp_set_services (sfdp_main_t *sfdp, u32 tenant_id, + sfdp_bitmap_t bitmap, u8 direction); +clib_error_t *sfdp_set_timeout (sfdp_main_t *sfdp, u32 tenant_id, + u32 timeout_idx, u32 timeout_val); + +clib_error_t *sfdp_set_sp_node (sfdp_main_t *sfdp, u32 tenant_id, u32 sp_index, + u32 node_index); +clib_error_t *sfdp_set_icmp_error_node (sfdp_main_t *sfdp, u32 tenant_id, + u8 is_ip6, u32 node_index); +void sfdp_normalise_ip4_key (sfdp_session_t *session, + sfdp_session_ip4_key_t *result, u8 key_idx); + +void sfdp_normalise_ip6_key (sfdp_session_t *session, + sfdp_session_ip6_key_t *result, u8 key_idx); + +void sfdp_table_format_add_header_col (table_t *t); +u32 sfdp_table_format_insert_session (table_t *t, u32 n, u32 session_index, + sfdp_session_t *session, u32 tenant_id, + f64 now); +int sfdp_bihash_add_del_inline_with_hash_24_8 (clib_bihash_24_8_t *h, + clib_bihash_kv_24_8_t *kv, + u64 hash, u8 is_add); + +int sfdp_bihash_add_del_inline_with_hash_48_8 (clib_bihash_48_8_t *h, + clib_bihash_kv_48_8_t *kv, + u64 hash, u8 is_add); + +void sfdp_ip4_full_reass_custom_context_register_next_node (u16 node_index); +void sfdp_ip6_full_reass_custom_context_register_next_node (u16 node_index); +void +sfdp_ip4_full_reass_custom_context_register_next_err_node (u16 node_index); +void +sfdp_ip6_full_reass_custom_context_register_next_err_node (u16 node_index); + +#define SFDP_CORE_PLUGIN_BUILD_VER "1.0" + +#endif /* __included_sfdp_h__ */ diff --git a/src/vnet/sfdp/sfdp_funcs.h b/src/vnet/sfdp/sfdp_funcs.h new file mode 100644 index 00000000000..b3e952525bb --- /dev/null +++ b/src/vnet/sfdp/sfdp_funcs.h @@ -0,0 +1,308 @@ +/* SPDX-License-Identifier: Apache-2.0 + * Copyright (c) 2025 Cisco Systems, Inc. + */ + +#ifndef __included_sfdp_funcs_h__ +#define __included_sfdp_funcs_h__ +#include +#include + +static_always_inline void +sfdp_session_remove (sfdp_main_t *sfdp, sfdp_per_thread_data_t *ptd, + sfdp_session_t *session, u32 thread_index, + u32 session_index) +{ + clib_bihash_kv_8_8_t kv2 = { 0 }; + sfdp_bihash_kv46_t kv = { 0 }; + __clib_aligned (CLIB_CACHE_LINE_BYTES) + u8 kvdata[SFDP_PARSER_MAX_KEY_SIZE + 8]; + uword parser_key_size; + void *parser_table; + sfdp_parser_data_t *parser; + sfdp_parser_main_t *pm = &sfdp_parser_main; + + kv2.key = session->session_id; + if (session->key_flags & SFDP_SESSION_KEY_FLAG_PRIMARY_VALID_IP4) + { + clib_memcpy_fast (&kv.kv4.key, + &session->keys[SFDP_SESSION_KEY_PRIMARY].key4, + sizeof (kv.kv4.key)); + clib_bihash_add_del_24_8 (&sfdp->table4, &kv.kv4, 0); + } + if (session->key_flags & SFDP_SESSION_KEY_FLAG_SECONDARY_VALID_IP4) + { + clib_memcpy_fast (&kv.kv4.key, + &session->keys[SFDP_SESSION_KEY_SECONDARY].key4, + sizeof (kv.kv4.key)); + clib_bihash_add_del_24_8 (&sfdp->table4, &kv.kv4, 0); + } + if (session->key_flags & SFDP_SESSION_KEY_FLAG_PRIMARY_VALID_IP6) + { + clib_memcpy_fast (&kv.kv6.key, + &session->keys[SFDP_SESSION_KEY_PRIMARY].key6, + sizeof (kv.kv6.key)); + clib_bihash_add_del_48_8 (&sfdp->table6, &kv.kv6, 0); + } + if (session->key_flags & SFDP_SESSION_KEY_FLAG_SECONDARY_VALID_IP6) + { + clib_memcpy_fast (&kv.kv6.key, + &session->keys[SFDP_SESSION_KEY_SECONDARY].key6, + sizeof (kv.kv6.key)); + clib_bihash_add_del_48_8 (&sfdp->table6, &kv.kv6, 0); + } + if (session->key_flags & SFDP_SESSION_KEY_FLAG_PRIMARY_VALID_USER) + { + parser = vec_elt_at_index ( + pm->parsers, session->parser_index[SFDP_SESSION_KEY_PRIMARY]); + parser_key_size = parser->key_size; + parser_table = parser->bihash_table; + clib_memcpy_fast (kvdata, &session->keys_data[SFDP_SESSION_KEY_PRIMARY], + parser_key_size); + SFDP_PARSER_BIHASH_CALL_FN (parser, sfdp_parser_bihash_add_del_fn, + parser_table, kvdata, 0); + } + if (session->key_flags & SFDP_SESSION_KEY_FLAG_SECONDARY_VALID_USER) + { + parser = vec_elt_at_index ( + pm->parsers, session->parser_index[SFDP_SESSION_KEY_SECONDARY]); + parser_key_size = parser->key_size; + parser_table = parser->bihash_table; + clib_memcpy_fast (kvdata, + &session->keys_data[SFDP_SESSION_KEY_SECONDARY], + parser_key_size); + SFDP_PARSER_BIHASH_CALL_FN (parser, sfdp_parser_bihash_add_del_fn, + parser_table, kvdata, 0); + } + clib_bihash_add_del_8_8 (&sfdp->session_index_by_id, &kv2, 0); + vlib_increment_simple_counter ( + &sfdp->tenant_session_ctr[SFDP_TENANT_SESSION_COUNTER_REMOVED], + thread_index, session->tenant_idx, 1); + session->state = SFDP_SESSION_STATE_FREE; + session->owning_thread_index = SFDP_UNBOUND_THREAD_INDEX; + sfdp_free_session (sfdp, ptd, session_index); +} + +static_always_inline int +sfdp_session_try_add_secondary_key (sfdp_main_t *sfdp, u32 thread_index, + u32 pseudo_flow_index, + sfdp_session_ip46_key_t *key, + ip46_type_t type, u64 *h) +{ + int rv; + sfdp_bihash_kv46_t kv; + u64 value; + sfdp_session_t *session; + u32 session_index; + + session_index = sfdp_session_from_flow_index (pseudo_flow_index); + session = sfdp_session_at_index (session_index); + value = sfdp_session_mk_table_value (thread_index, pseudo_flow_index, + session->session_version); + + if (type == IP46_TYPE_IP4) + { + kv.kv4.key[0] = key->key4.ip4_key.as_u64x2[0]; + kv.kv4.key[1] = key->key4.ip4_key.as_u64x2[1]; + kv.kv4.key[2] = key->key4.as_u64; + kv.kv4.value = value; + *h = clib_bihash_hash_24_8 (&kv.kv4); + if ((rv = sfdp_bihash_add_del_inline_with_hash_24_8 ( + &sfdp->table4, &kv.kv4, *h, 2)) == 0) + { + session->keys[SFDP_SESSION_KEY_SECONDARY] = *key; + session->pseudo_dir[SFDP_SESSION_KEY_SECONDARY] = + pseudo_flow_index & 0x1; + session->key_flags |= SFDP_SESSION_KEY_FLAG_SECONDARY_VALID_IP4; + } + } + else + { + kv.kv6.key[0] = key->key6.ip6_key.as_u64; + kv.kv6.key[1] = key->key6.ip6_key.as_u64x4[0]; + kv.kv6.key[2] = key->key6.ip6_key.as_u64x4[1]; + kv.kv6.key[3] = key->key6.ip6_key.as_u64x4[2]; + kv.kv6.key[4] = key->key6.ip6_key.as_u64x4[3]; + kv.kv6.key[5] = key->key6.as_u64; + kv.kv6.value = value; + *h = clib_bihash_hash_48_8 (&kv.kv6); + if ((rv = sfdp_bihash_add_del_inline_with_hash_48_8 ( + &sfdp->table6, &kv.kv6, *h, 2)) == 0) + { + session->keys[SFDP_SESSION_KEY_SECONDARY] = *key; + session->pseudo_dir[SFDP_SESSION_KEY_SECONDARY] = + pseudo_flow_index & 0x1; + session->key_flags |= SFDP_SESSION_KEY_FLAG_SECONDARY_VALID_IP6; + } + } + + return rv; +} + +static_always_inline int +sfdp_parser_session_try_add_secondary_key_with_details ( + void *table, uword key_size, uword parser_index, u32 thread_index, + u32 pseudo_flow_index, void *key, u64 *h) +{ + __clib_aligned (CLIB_CACHE_LINE_BYTES) + u8 kvdata[SFDP_PARSER_MAX_KEY_SIZE + 8]; + int rv; + u64 value; + sfdp_session_t *session; + u32 session_index; + const struct + { + uword key_size; + } p = { .key_size = key_size }; + + session_index = sfdp_session_from_flow_index (pseudo_flow_index); + session = sfdp_session_at_index (session_index); + value = sfdp_session_mk_table_value (thread_index, pseudo_flow_index, + session->session_version); + + clib_memcpy_fast (kvdata, key, key_size); + clib_memcpy_fast (kvdata + key_size, &value, sizeof (value)); + *h = SFDP_PARSER_BIHASH_CALL_FN (&p, sfdp_parser_bihash_hash_fn, kvdata); + if ((rv = SFDP_PARSER_BIHASH_CALL_FN (&p, sfdp_parser_bihash_add_del_fn, + table, kvdata, 2)) == 0) + { + clib_memcpy_fast (session->keys_data[SFDP_SESSION_KEY_SECONDARY], kvdata, + key_size); + session->pseudo_dir[SFDP_SESSION_KEY_SECONDARY] = + pseudo_flow_index & 0x1; + session->key_flags |= SFDP_SESSION_KEY_FLAG_SECONDARY_VALID_USER; + session->parser_index[SFDP_SESSION_KEY_SECONDARY] = parser_index; + } + return rv; +} + +static_always_inline u8 +sfdp_renormalise_ip4_key (sfdp_session_ip4_key_t *key, u32 old_pseudo) +{ + if (clib_net_to_host_u32 (key->ip4_key.ip_addr_hi) < + clib_net_to_host_u32 (key->ip4_key.ip_addr_lo)) + { + u32 tmp_ip4; + u16 tmp_port; + tmp_ip4 = key->ip4_key.ip_addr_hi; + tmp_port = key->ip4_key.port_hi; + key->ip4_key.ip_addr_hi = key->ip4_key.ip_addr_lo; + key->ip4_key.port_hi = key->ip4_key.port_lo; + key->ip4_key.ip_addr_lo = tmp_ip4; + key->ip4_key.port_lo = tmp_port; + old_pseudo ^= 0x1; + } + return old_pseudo; +} + +static_always_inline void +sfdp_session_bind_keys_to_thread (sfdp_session_t *session, u32 session_index, + u16 thread_index) +{ + clib_bihash_kv_24_8_t kv4; + clib_bihash_kv_48_8_t kv6; + __clib_aligned (CLIB_CACHE_LINE_BYTES) + u8 kvdata[SFDP_PARSER_MAX_KEY_SIZE + 8]; + uword parser_key_size; + void *parser_table; + sfdp_parser_data_t *parser; + sfdp_main_t *sfdp = &sfdp_main; + sfdp_parser_main_t *pm = &sfdp_parser_main; + u32 fi = session_index << 1; + + if (session->key_flags & SFDP_SESSION_KEY_FLAG_PRIMARY_VALID_IP4) + { + clib_memcpy_fast (kv4.key, &session->keys[SFDP_SESSION_KEY_PRIMARY].key4, + sizeof (kv4.key)); + kv4.value = sfdp_session_mk_table_value ( + thread_index, fi | session->pseudo_dir[SFDP_SESSION_KEY_PRIMARY], + session->session_version); + clib_bihash_add_del_24_8 (&sfdp->table4, &kv4, 1); + } + if (session->key_flags & SFDP_SESSION_KEY_FLAG_PRIMARY_VALID_IP6) + { + clib_memcpy_fast (kv6.key, &session->keys[SFDP_SESSION_KEY_PRIMARY].key6, + sizeof (kv6.key)); + kv6.value = sfdp_session_mk_table_value ( + thread_index, fi | session->pseudo_dir[SFDP_SESSION_KEY_PRIMARY], + session->session_version); + clib_bihash_add_del_48_8 (&sfdp->table6, &kv6, 1); + } + if (session->key_flags & SFDP_SESSION_KEY_FLAG_PRIMARY_VALID_USER) + { + parser = vec_elt_at_index ( + pm->parsers, session->parser_index[SFDP_SESSION_KEY_PRIMARY]); + parser_key_size = parser->key_size; + parser_table = parser->bihash_table; + clib_memcpy_fast (kvdata, &session->keys_data[SFDP_SESSION_KEY_PRIMARY], + parser_key_size); + ((u64u *) (kvdata + parser_key_size))[0] = sfdp_session_mk_table_value ( + thread_index, fi | session->pseudo_dir[SFDP_SESSION_KEY_PRIMARY], + session->session_version); + SFDP_PARSER_BIHASH_CALL_FN (parser, sfdp_parser_bihash_add_del_fn, + parser_table, kvdata, 1); + } + if (session->key_flags & SFDP_SESSION_KEY_FLAG_SECONDARY_VALID_IP4) + { + clib_memcpy_fast (kv4.key, + &session->keys[SFDP_SESSION_KEY_SECONDARY].key4, + sizeof (kv4.key)); + kv4.value = sfdp_session_mk_table_value ( + thread_index, fi | session->pseudo_dir[SFDP_SESSION_KEY_SECONDARY], + session->session_version); + clib_bihash_add_del_24_8 (&sfdp->table4, &kv4, 1); + } + if (session->key_flags & SFDP_SESSION_KEY_FLAG_SECONDARY_VALID_IP6) + { + clib_memcpy_fast (kv6.key, + &session->keys[SFDP_SESSION_KEY_SECONDARY].key6, + sizeof (kv6.key)); + kv6.value = sfdp_session_mk_table_value ( + thread_index, fi | session->pseudo_dir[SFDP_SESSION_KEY_SECONDARY], + session->session_version); + clib_bihash_add_del_48_8 (&sfdp->table6, &kv6, 1); + } + if (session->key_flags & SFDP_SESSION_KEY_FLAG_SECONDARY_VALID_USER) + { + parser = vec_elt_at_index ( + pm->parsers, session->parser_index[SFDP_SESSION_KEY_SECONDARY]); + parser_key_size = parser->key_size; + parser_table = parser->bihash_table; + clib_memcpy_fast (kvdata, + &session->keys_data[SFDP_SESSION_KEY_SECONDARY], + parser_key_size); + ((u64u *) (kvdata + parser_key_size))[0] = sfdp_session_mk_table_value ( + thread_index, fi | session->pseudo_dir[SFDP_SESSION_KEY_SECONDARY], + session->session_version); + SFDP_PARSER_BIHASH_CALL_FN (parser, sfdp_parser_bihash_add_del_fn, + parser_table, kvdata, 1); + } +} + +static_always_inline int +sfdp_session_bind_to_thread (u32 session_index, u16 *thread_index, + u8 new_session) +{ + sfdp_session_t *session = sfdp_session_at_index (session_index); + u16 expected = SFDP_UNBOUND_THREAD_INDEX; + sfdp_main_t *sfdp = &sfdp_main; + sfdp_per_thread_data_t *ptd = + vec_elt_at_index (sfdp->per_thread_data, *thread_index); + + if (clib_atomic_cmp_and_swap_acq_relax_n (&session->owning_thread_index, + &expected, *thread_index, 0) != 0) + { + *thread_index = expected; /* Return the actual thread index */ + return -1; /* The session was already bound to another thread */ + } + + ASSERT (*thread_index == vlib_get_thread_index ()); + + sfdp_session_bind_keys_to_thread (session, session_index, *thread_index); + if (new_session) + { + sfdp_notify_new_sessions (sfdp, &session_index, 1); + sfdp_session_generate_and_set_id (sfdp, ptd, session); + } + return 0; +} +#endif diff --git a/src/vnet/sfdp/sfdp_types.api b/src/vnet/sfdp/sfdp_types.api new file mode 100644 index 00000000000..f884acb6170 --- /dev/null +++ b/src/vnet/sfdp/sfdp_types.api @@ -0,0 +1,48 @@ +/* SPDX-License-Identifier: Apache-2.0 + * Copyright (c) 2025 Cisco Systems, Inc. + */ + +option version = "0.0.1"; +import "vnet/ip/ip_types.api"; + +enum sfdp_session_direction : u8 +{ + SFDP_API_FORWARD = 0, + SFDP_API_REVERSE = 1, +}; + +enum sfdp_session_state : u8 +{ + SFDP_API_SESSION_STATE_FSOL = 0, + SFDP_API_SESSION_STATE_ESTABLISHED = 1, + SFDP_API_SESSION_STATE_TIME_WAIT = 2, +}; + +enum sfdp_sp_node : u8 +{ + SFDP_API_SP_NODE_IP4_REASS = 0, + SFDP_API_SP_NODE_IP6_REASS = 1, + SFDP_API_SP_NODE_IP4_UNKNOWN_PROTO = 2, + SFDP_API_SP_NODE_IP6_UNKNOWN_PROTO = 3, + SFDP_API_SP_NODE_IP4_ICMP4_ERROR = 4, + SFDP_API_SP_NODE_IP6_ICMP6_ERROR = 5, +}; + +enum sfdp_session_type : u8 +{ + SFDP_API_SESSION_TYPE_IP4 = 0, +}; + +typedef sfdp_service_name +{ + string data[32]; +}; + +typedef sfdp_session_key +{ + u32 context_id; + vl_api_address_t init_addr; + u16 init_port; + vl_api_address_t resp_addr; + u16 resp_port; +}; \ No newline at end of file diff --git a/src/vnet/sfdp/sfdp_types_funcs.h b/src/vnet/sfdp/sfdp_types_funcs.h new file mode 100644 index 00000000000..956020abaa8 --- /dev/null +++ b/src/vnet/sfdp/sfdp_types_funcs.h @@ -0,0 +1,90 @@ +/* SPDX-License-Identifier: Apache-2.0 + * Copyright (c) 2025 Cisco Systems, Inc. + */ + +#ifndef __included_sfdp_types_funcs_h__ +#define __included_sfdp_types_funcs_h__ + +#include +#include +#include +#include +static_always_inline u8 +sfdp_api_direction (vl_api_sfdp_session_direction_t dir) +{ + switch (dir) + { + case SFDP_API_FORWARD: + return SFDP_FLOW_FORWARD; + case SFDP_API_REVERSE: + return SFDP_API_REVERSE; + } + return SFDP_FLOW_FORWARD; +} + +static_always_inline vl_api_sfdp_session_type_t +sfdp_session_type_encode (sfdp_session_type_t x) +{ + switch (x) + { + case SFDP_SESSION_TYPE_IP4: + return SFDP_API_SESSION_TYPE_IP4; + default: + return -1; + } +}; + +static_always_inline u8 +sfdp_api_sp_node (vl_api_sfdp_sp_node_t sp_node) +{ + switch (sp_node) + { + case SFDP_API_SP_NODE_IP4_REASS: + return SFDP_SP_NODE_IP4_REASS; + + case SFDP_API_SP_NODE_IP6_REASS: + return SFDP_SP_NODE_IP6_REASS; + + case SFDP_API_SP_NODE_IP4_UNKNOWN_PROTO: + return SFDP_SP_NODE_IP4_UNKNOWN_PROTO; + + case SFDP_API_SP_NODE_IP6_UNKNOWN_PROTO: + return SFDP_SP_NODE_IP6_UNKNOWN_PROTO; + + case SFDP_API_SP_NODE_IP4_ICMP4_ERROR: + return SFDP_SP_NODE_IP4_ICMP4_ERROR; + + case SFDP_API_SP_NODE_IP6_ICMP6_ERROR: + return SFDP_SP_NODE_IP6_ICMP6_ERROR; + + default: + return 0; + } +} + +static_always_inline void +sfdp_session_ip46_key_encode (sfdp_session_ip46_key_t *skey, ip46_type_t type, + vl_api_sfdp_session_key_t *out) +{ + ip46_address_t ip_addr_lo, ip_addr_hi; + if (type == IP46_TYPE_IP4) + { + out->context_id = clib_host_to_net_u32 (skey->key4.context_id); + ip_addr_lo.ip4.as_u32 = skey->key4.ip4_key.ip_addr_lo; + ip_addr_hi.ip4.as_u32 = skey->key4.ip4_key.ip_addr_hi; + out->init_port = clib_host_to_net_u16 (skey->key4.ip4_key.port_lo); + out->resp_port = clib_host_to_net_u16 (skey->key4.ip4_key.port_hi); + } + else + { + out->context_id = clib_host_to_net_u32 (skey->key6.context_id); + ip_addr_lo.ip6 = skey->key6.ip6_key.ip6_addr_lo; + ip_addr_hi.ip6 = skey->key6.ip6_key.ip6_addr_hi; + out->init_port = clib_host_to_net_u16 (skey->key6.ip6_key.port_lo); + out->resp_port = clib_host_to_net_u16 (skey->key6.ip6_key.port_hi); + } + ip_address_encode (&ip_addr_lo, type, &out->init_addr); + ip_address_encode (&ip_addr_hi, type, &out->resp_addr); +} + +#endif /*__included_sfdp_types_funcs_h__*/ \ No newline at end of file diff --git a/src/vnet/sfdp/timer/timer.c b/src/vnet/sfdp/timer/timer.c new file mode 100644 index 00000000000..649dbe8f3ec --- /dev/null +++ b/src/vnet/sfdp/timer/timer.c @@ -0,0 +1,161 @@ +/* SPDX-License-Identifier: Apache-2.0 + * Copyright (c) 2025 Cisco Systems, Inc. + */ + +#include + +#include + +#include +#include + +sfdp_timer_main_t sfdp_timer_main; + +static void +expired_timer_callback (u32 *expired) +{ + u32 *e; + uword thread_index = vlib_get_thread_index (); + sfdp_timer_main_t *t = &sfdp_timer_main; + sfdp_timer_per_thread_data_t *ptd = + vec_elt_at_index (t->per_thread_data, thread_index); + vec_foreach (e, expired) + { + u32 session_idx = e[0] & SFDP_TIMER_SI_MASK; + vec_add1 (ptd->expired_sessions, session_idx); + } +} + +static void +timer_expiry_cb_enable () +{ + sfdp_timer_main_t *t = &sfdp_timer_main; + vlib_thread_main_t *tm = vlib_get_thread_main (); + vec_validate (t->per_thread_data, tm->n_vlib_mains - 1); + sfdp_timer_per_thread_data_t *ptd; + vec_foreach (ptd, t->per_thread_data) + { + ptd->expired_sessions = 0; + sfdp_tw_init (&ptd->wheel, expired_timer_callback, SFDP_TIMER_INTERVAL, + ~0); + } +} + +static void +timer_expiry_cb_disable () +{ + // Cleanup timer wheel ? Disabling not supported for now. +} + +static u32 * +timer_expiry_cb_expire_or_evict_sessions (u32 desired_expiries, + u32 *expired_sessions_vec) +{ + (void) desired_expiries; // TODO: Early discards not supported for now. + + sfdp_timer_main_t *t = &sfdp_timer_main; + vlib_main_t *vm = vlib_get_main (); + u32 tidx = vlib_get_thread_index (); + sfdp_timer_per_thread_data_t *ptd = + vec_elt_at_index (t->per_thread_data, tidx); + u32 session_index; + + f64 now = vlib_time_now (vm); + ptd->current_time = now; + + sfdp_expire_timers (&ptd->wheel, now); + + sfdp_session_index_iterate_expired (ptd, session_index) + { + sfdp_session_t *session = sfdp_session_at_index (session_index); + sfdp_session_timer_t *timer = SFDP_SESSION_TIMER (session); + f64 diff = + (timer->next_expiration - (ptd->current_time + SFDP_TIMER_INTERVAL)) / + SFDP_TIMER_INTERVAL; + if (diff > (f64) 1.) + { + /* Rearm the timer accordingly */ + sfdp_session_timer_start (&ptd->wheel, timer, session_index, + ptd->current_time, diff); + } + else + { + vec_add1 (expired_sessions_vec, session_index); + } + } + + return expired_sessions_vec; +} + +static void +timer_expiry_cb_notify_new_sessions (const u32 *new_sessions, u32 len) +{ + sfdp_main_t *sfdp = &sfdp_main; + sfdp_timer_main_t *t = &sfdp_timer_main; + vlib_main_t *vm = vlib_get_main (); + u32 tidx = vlib_get_thread_index (); + sfdp_timer_per_thread_data_t *ptd = + vec_elt_at_index (t->per_thread_data, tidx); + const u32 *session_index = new_sessions; + f64 time_now = vlib_time_now (vm); + ptd->current_time = time_now; + + // Start session timer in embryonic mode + while (len) + { + sfdp_session_t *session = sfdp_session_at_index (*session_index); + sfdp_session_timer_t *timer = SFDP_SESSION_TIMER (session); + sfdp_tenant_t *tenant = sfdp_tenant_at_index (sfdp, session->tenant_idx); + sfdp_session_timer_start (&ptd->wheel, timer, *session_index, time_now, + tenant->timeouts[SFDP_TIMEOUT_EMBRYONIC]); + + len--; + session_index++; + } +} + +static f64 +timer_expiry_cb_session_remaining_time (sfdp_session_t *session, f64 now) +{ + return SFDP_SESSION_TIMER (session)->next_expiration - now; +} + +static u8 * +timer_expiry_cb_format_session_details (u8 *s, va_list *args) +{ + sfdp_session_t *session = va_arg (*args, sfdp_session_t *); + f64 now = va_arg (*args, f64); + sfdp_session_timer_t *timer = SFDP_SESSION_TIMER (session); + f64 remaining_time = timer->next_expiration - now; + s = format (s, "expires after: %fs\n", remaining_time); + return s; +} + +u32 +sfdp_timer_register_as_expiry_module () +{ + sfdp_timeout_t timeouts[SFDP_MAX_TIMEOUTS] = {}; + int ret; + u32 i = 0; +#define _(n, v, str) \ + timeouts[i].name = str; \ + timeouts[i].val = v; \ + i++; + foreach_sfdp_timeout +#undef _ + + if ((ret = sfdp_init_timeouts (timeouts, i))) + { + return ret; + } + + sfdp_expiry_callbacks_t cbs = { + .enable = timer_expiry_cb_enable, + .disable = timer_expiry_cb_disable, + .expire_or_evict_sessions = timer_expiry_cb_expire_or_evict_sessions, + .notify_new_sessions = timer_expiry_cb_notify_new_sessions, + .session_remaining_time = timer_expiry_cb_session_remaining_time, + .format_session_details = timer_expiry_cb_format_session_details + }; + return sfdp_set_expiry_callbacks (&cbs); +} diff --git a/src/vnet/sfdp/timer/timer.h b/src/vnet/sfdp/timer/timer.h new file mode 100644 index 00000000000..48ef288a113 --- /dev/null +++ b/src/vnet/sfdp/timer/timer.h @@ -0,0 +1,142 @@ +/* SPDX-License-Identifier: Apache-2.0 + * Copyright (c) 2025 Cisco Systems, Inc. + */ + +#ifndef __included_sfdp_timer_h__ +#define __included_sfdp_timer_h__ +#include +#include + +#include + +typedef tw_timer_wheel_2t_1w_2048sl_t sfdp_tw_t; + +typedef struct +{ + sfdp_tw_t wheel; + f64 current_time; + u32 *expired_sessions; +} sfdp_timer_per_thread_data_t; + +typedef struct +{ + sfdp_timer_per_thread_data_t *per_thread_data; +} sfdp_timer_main_t; + +extern sfdp_timer_main_t sfdp_timer_main; + +// Per session state held in sfdp session expiry opaque data +typedef struct +{ + f64 next_expiration; + u32 handle; + u32 __unused; +} __attribute__ ((may_alias)) sfdp_session_timer_t; + +#define foreach_sfdp_timeout \ + _ (EMBRYONIC, 5, "embryonic") \ + _ (ESTABLISHED, 120, "established") \ + _ (TCP_ESTABLISHED, 3600, "tcp-established") \ + _ (SECURITY, 30, "security") + +typedef enum +{ +#define _(name, val, str) SFDP_TIMEOUT_##name, + foreach_sfdp_timeout +#undef _ + SFDP_N_TIMEOUT +} sfdp_timeout_type_t; + +#define SFDP_SESSION_TIMER(session) \ + SFDP_EXPIRY_SESSION (session, sfdp_session_timer_t) + +SFDP_EXPIRY_STATIC_ASSERT_FITS_IN_EXPIRY_OPAQUE (sfdp_session_timer_t); + +#define sfdp_timer_start_internal tw_timer_start_2t_1w_2048sl +#define sfdp_timer_stop_internal tw_timer_stop_2t_1w_2048sl +#define sfdp_timer_update_internal tw_timer_update_2t_1w_2048sl +#define sfdp_expire_timers tw_timer_expire_timers_2t_1w_2048sl +#define SFDP_TIMER_SI_MASK (0x7fffffff) +#define SFDP_TIMER_INTERVAL ((f64) 1.0) /*in seconds*/ +#define SFDP_SECONDS_TO_TICKS (seconds) ((seconds) / SFDP_TIMER_INTERVAL) +#define SFDP_TICKS_TO_SECONDS (ticks) ((ticks) *SFDP_TIMER_INTERVAL) + +static_always_inline sfdp_timer_per_thread_data_t * +sfdp_timer_get_per_thread_data (u32 thread_index) +{ + return vec_elt_at_index (sfdp_timer_main.per_thread_data, thread_index); +} + +static_always_inline void +sfdp_tw_init (sfdp_tw_t *tw, void *expired_timer_callback, f64 timer_interval, + u32 max_expirations) +{ + tw_timer_wheel_init_2t_1w_2048sl (tw, expired_timer_callback, timer_interval, + max_expirations); +} + +/* Use timer mechanism for expiry. + * This must be called while sfdp is not running yet. + * Will return 0 on success, -1 otherwise. */ +u32 sfdp_timer_register_as_expiry_module (); + +static_always_inline void +sfdp_session_timer_start (sfdp_tw_t *tw, sfdp_session_timer_t *timer, + u32 session_index, f64 now, u32 ticks) +{ + timer->handle = sfdp_timer_start_internal (tw, session_index, 0, ticks); + timer->next_expiration = now + ticks * SFDP_TIMER_INTERVAL; +} + +static_always_inline void +sfdp_session_timer_stop (sfdp_tw_t *tw, sfdp_session_timer_t *timer) +{ + sfdp_timer_stop_internal (tw, timer->handle); +} + +static_always_inline void +sfdp_session_timer_update (sfdp_tw_t *tw, sfdp_session_timer_t *timer, f64 now, + u32 ticks) +{ + timer->next_expiration = now + ticks * SFDP_TIMER_INTERVAL; +} + +static_always_inline void +sfdp_session_timer_update_maybe_past (sfdp_tw_t *tw, + sfdp_session_timer_t *timer, f64 now, + u32 ticks) +{ + if (timer->next_expiration > now + (ticks * SFDP_TIMER_INTERVAL)) + sfdp_timer_update_internal (tw, timer->handle, ticks); + + timer->next_expiration = now + ticks * SFDP_TIMER_INTERVAL; +} + +static_always_inline void +sfdp_session_timer_update_unlikely_past (sfdp_tw_t *tw, + sfdp_session_timer_t *timer, f64 now, + u32 ticks) +{ + if (PREDICT_FALSE (timer->next_expiration > + now + (ticks * SFDP_TIMER_INTERVAL))) + { + sfdp_timer_update_internal (tw, timer->handle, ticks); + } + sfdp_session_timer_update (tw, timer, now, ticks); +} + +static_always_inline uword +vec_reset_len_return (u32 *v) +{ + vec_reset_length (v); + return 0; +} + +#define sfdp_session_index_iterate_expired(ptd, s) \ + for (u32 *s_ptr = (ptd)->expired_sessions; \ + ((s_ptr < vec_end (ptd->expired_sessions)) && \ + (((s) = s_ptr[0]) || 1)) || \ + vec_reset_len_return ((ptd)->expired_sessions); \ + s_ptr++) + +#endif /* __included_sfdp_timer_h__ */ -- 2.16.6