sfdp: StateFul Data Plane 69/43869/10
authorMohammed Hawari <[email protected]>
Thu, 9 Oct 2025 15:22:01 +0000 (17:22 +0200)
committerDamjan Marion <[email protected]>
Tue, 21 Oct 2025 15:59:54 +0000 (15:59 +0000)
Change-Id: I229548afbae609f8a15047821e811874aa788a53
Type: feature
Signed-off-by: Mohammed Hawari <[email protected]>
38 files changed:
MAINTAINERS
docs/spelling_wordlist.txt
src/vnet/CMakeLists.txt
src/vnet/sfdp/api.c [new file with mode: 0644]
src/vnet/sfdp/callbacks.c [new file with mode: 0644]
src/vnet/sfdp/callbacks.h [new file with mode: 0644]
src/vnet/sfdp/cli.c [new file with mode: 0644]
src/vnet/sfdp/common.h [new file with mode: 0644]
src/vnet/sfdp/drop/node.c [new file with mode: 0644]
src/vnet/sfdp/expiry/expiry.c [new file with mode: 0644]
src/vnet/sfdp/expiry/expiry.h [new file with mode: 0644]
src/vnet/sfdp/expiry/expiry_cli.c [new file with mode: 0644]
src/vnet/sfdp/format.c [new file with mode: 0644]
src/vnet/sfdp/lookup/full_reass_node.c [new file with mode: 0644]
src/vnet/sfdp/lookup/icmp_error_node.c [new file with mode: 0644]
src/vnet/sfdp/lookup/lookup.h [new file with mode: 0644]
src/vnet/sfdp/lookup/lookup_common.h [new file with mode: 0644]
src/vnet/sfdp/lookup/lookup_inlines.h [new file with mode: 0644]
src/vnet/sfdp/lookup/lookup_ip4.h [new file with mode: 0644]
src/vnet/sfdp/lookup/lookup_ip6.h [new file with mode: 0644]
src/vnet/sfdp/lookup/node.c [new file with mode: 0644]
src/vnet/sfdp/lookup/parser.c [new file with mode: 0644]
src/vnet/sfdp/lookup/parser.h [new file with mode: 0644]
src/vnet/sfdp/lookup/parser_inlines.h [new file with mode: 0644]
src/vnet/sfdp/lookup/reass.c [new file with mode: 0644]
src/vnet/sfdp/lookup/reass.h [new file with mode: 0644]
src/vnet/sfdp/lookup/sfdp_bihashes.h [new file with mode: 0644]
src/vnet/sfdp/lookup/sv_reass_node.c [new file with mode: 0644]
src/vnet/sfdp/service.c [new file with mode: 0644]
src/vnet/sfdp/service.h [new file with mode: 0644]
src/vnet/sfdp/sfdp.api [new file with mode: 0644]
src/vnet/sfdp/sfdp.c [new file with mode: 0644]
src/vnet/sfdp/sfdp.h [new file with mode: 0644]
src/vnet/sfdp/sfdp_funcs.h [new file with mode: 0644]
src/vnet/sfdp/sfdp_types.api [new file with mode: 0644]
src/vnet/sfdp/sfdp_types_funcs.h [new file with mode: 0644]
src/vnet/sfdp/timer/timer.c [new file with mode: 0644]
src/vnet/sfdp/timer/timer.h [new file with mode: 0644]

index 647dd1b..6ba9e56 100644 (file)
@@ -217,6 +217,12 @@ I: span
 M:     N/A
 F:     src/vnet/span
 
+VNET StateFul Data Plane
+I:      sfdp
+M:      Mohammed Hawari <[email protected]>
+M:      Ole Troan <[email protected]>
+F:      src/vnet/sfdp/
+
 Plugin - Crypto - native
 I:     crypto-native
 M:     Damjan Marion <[email protected]>
index 73b26a4..3794cf8 100644 (file)
@@ -1036,6 +1036,8 @@ setjmp
 settingupenvironment
 setUp
 setUpClass
+sfdp
+SFDP
 sflow
 sFlow
 sfr
@@ -1061,6 +1063,7 @@ snat
 socketsvr
 socksvr
 sourceNATing
+sp
 spd
 Specialisations
 spinlock
index 8d62922..5e54ae0 100644 (file)
@@ -1399,6 +1399,55 @@ list(APPEND VNET_API_FILES
   ip6-nd/rd_cp.api
 )
 
+##############################################################################
+# SFDP - StateFul Data Plane Infra
+##############################################################################
+list (APPEND VNET_SOURCES
+  sfdp/lookup/node.c
+  sfdp/lookup/icmp_error_node.c
+  sfdp/lookup/parser.c
+  sfdp/drop/node.c
+  sfdp/format.c
+  sfdp/sfdp.c
+  sfdp/service.c
+  sfdp/cli.c
+  sfdp/api.c
+  sfdp/timer/timer.c
+  sfdp/expiry/expiry.c
+  sfdp/expiry/expiry_cli.c
+  sfdp/callbacks.c
+)
+
+list (APPEND VNET_HEADERS
+  sfdp/sfdp.h
+  sfdp/callbacks.h
+  sfdp/service.h
+  sfdp/sfdp_funcs.h
+  sfdp/sfdp_types_funcs.h
+  sfdp/common.h
+  sfdp/lookup/sfdp_bihashes.h
+  sfdp/lookup/lookup.h
+  sfdp/lookup/lookup_common.h
+  sfdp/lookup/lookup_ip4.h
+  sfdp/lookup/lookup_ip6.h
+  sfdp/lookup/lookup_inlines.h
+  sfdp/lookup/parser.h
+  sfdp/lookup/parser_inlines.h
+  sfdp/timer/timer.h
+  sfdp/expiry/expiry.h
+)
+
+list (APPEND VNET_API_FILES
+  sfdp/sfdp_types.api
+  sfdp/sfdp.api
+)
+
+list (APPEND VNET_MULTIARCH_SOURCES
+  sfdp/lookup/node.c
+  sfdp/lookup/parser.c
+  sfdp/drop/node.c
+)
+
 ##############################################################################
 # VNET Library
 ##############################################################################
diff --git a/src/vnet/sfdp/api.c b/src/vnet/sfdp/api.c
new file mode 100644 (file)
index 0000000..0fab825
--- /dev/null
@@ -0,0 +1,266 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright (c) 2025 Cisco Systems, Inc.
+ */
+
+#include <vnet/sfdp/sfdp.h>
+
+#include <vlibapi/api.h>
+#include <vlibmemory/api.h>
+#include <vnet/ip/ip_types_api.h>
+#include <vnet/format_fns.h>
+#include <vnet/sfdp/sfdp.api_enum.h>
+#include <vnet/sfdp/sfdp.api_types.h>
+#include <vnet/sfdp/sfdp_types_funcs.h>
+
+#define REPLY_MSG_ID_BASE sfdp->msg_id_base
+#include <vlibapi/api_helper_macros.h>
+
+static void
+vl_api_sfdp_tenant_add_del_t_handler (vl_api_sfdp_tenant_add_del_t *mp)
+{
+  sfdp_main_t *sfdp = &sfdp_main;
+  u32 tenant_id = clib_net_to_host_u32 (mp->tenant_id);
+  u32 context_id =
+    mp->context_id == ~0 ? tenant_id : clib_net_to_host_u32 (mp->context_id);
+  u8 is_del = mp->is_del;
+  clib_error_t *err =
+    sfdp_tenant_add_del (sfdp, tenant_id, context_id, is_del);
+  vl_api_sfdp_tenant_add_del_reply_t *rmp;
+  int rv = err ? -1 : 0;
+  REPLY_MACRO (VL_API_SFDP_TENANT_ADD_DEL_REPLY);
+}
+
+static void
+vl_api_sfdp_set_services_t_handler (vl_api_sfdp_set_services_t *mp)
+{
+  sfdp_main_t *sfdp = &sfdp_main;
+  u32 tenant_id = clib_net_to_host_u32 (mp->tenant_id);
+  sfdp_bitmap_t bitmap = 0;
+  u8 idx = 0;
+  u8 dir = sfdp_api_direction (mp->dir);
+  int rv;
+  for (uword i = 0; i < mp->n_services; i++)
+    {
+      char *cstring = (char *) mp->services[i].data;
+      unformat_input_t tmp;
+      unformat_init_string (&tmp, cstring,
+                           strnlen (cstring, sizeof (mp->services[0].data)));
+      rv = unformat_user (&tmp, unformat_sfdp_service, &idx);
+      unformat_free (&tmp);
+      if (!rv)
+       {
+         rv = -1;
+         goto fail;
+       }
+      bitmap |= (1ULL << idx);
+    }
+  clib_error_t *err = sfdp_set_services (sfdp, tenant_id, bitmap, dir);
+  vl_api_sfdp_set_services_reply_t *rmp;
+  rv = err ? -1 : 0;
+fail:
+  REPLY_MACRO (VL_API_SFDP_SET_SERVICES_REPLY);
+}
+
+static void
+vl_api_sfdp_set_timeout_t_handler (vl_api_sfdp_set_timeout_t *mp)
+{
+  sfdp_main_t *sfdp = &sfdp_main;
+  u32 tenant_id = clib_net_to_host_u32 (mp->tenant_id);
+  u32 timeout_id = clib_net_to_host_u32 (mp->timeout_id);
+  u32 timeout_value = clib_net_to_host_u32 (mp->timeout_value);
+  clib_error_t *err =
+    sfdp_set_timeout (sfdp, tenant_id, timeout_id, timeout_value);
+  vl_api_sfdp_set_timeout_reply_t *rmp;
+  int rv = err ? -1 : 0;
+  REPLY_MACRO (VL_API_SFDP_SET_TIMEOUT_REPLY);
+}
+
+static void
+vl_api_sfdp_set_sp_node_t_handler (vl_api_sfdp_set_sp_node_t *mp)
+{
+  vl_api_sfdp_set_sp_node_reply_t *rmp;
+  sfdp_main_t *sfdp = &sfdp_main;
+  u32 tenant_id = clib_net_to_host_u32 (mp->tenant_id);
+  u8 sp_node = sfdp_api_sp_node (mp->sp_node);
+  u32 node_index = clib_net_to_host_u32 (mp->node_index);
+
+  clib_error_t *err = sfdp_set_sp_node (sfdp, tenant_id, sp_node, node_index);
+  int rv = err ? -1 : 0;
+  REPLY_MACRO (VL_API_SFDP_SET_SP_NODE_REPLY);
+}
+
+static void
+vl_api_sfdp_set_icmp_error_node_t_handler (
+  vl_api_sfdp_set_icmp_error_node_t *mp)
+{
+  vl_api_sfdp_set_icmp_error_node_reply_t *rmp;
+  sfdp_main_t *sfdp = &sfdp_main;
+  u32 tenant_id = clib_net_to_host_u32 (mp->tenant_id);
+  u8 is_ip6 = mp->is_ip6;
+  u32 node_index = clib_net_to_host_u32 (mp->node_index);
+
+  clib_error_t *err =
+    sfdp_set_icmp_error_node (sfdp, tenant_id, is_ip6, node_index);
+  int rv = err ? -1 : 0;
+  REPLY_MACRO (VL_API_SFDP_SET_ICMP_ERROR_NODE_REPLY);
+}
+
+static vl_api_sfdp_session_state_t
+sfdp_session_state_encode (sfdp_session_state_t x)
+{
+  switch (x)
+    {
+    case SFDP_SESSION_STATE_FSOL:
+      return SFDP_API_SESSION_STATE_FSOL;
+    case SFDP_SESSION_STATE_ESTABLISHED:
+      return SFDP_API_SESSION_STATE_ESTABLISHED;
+    case SFDP_SESSION_STATE_TIME_WAIT:
+      return SFDP_API_SESSION_STATE_TIME_WAIT;
+    default:
+      return -1;
+    }
+};
+
+static void
+sfdp_send_session_details (vl_api_registration_t *rp, u32 context,
+                          u32 session_index, u32 thread_index,
+                          sfdp_session_t *session)
+{
+  sfdp_main_t *sfdp = &sfdp_main;
+  vlib_main_t *vm = vlib_get_main ();
+  vl_api_sfdp_session_details_t *mp;
+  sfdp_session_ip46_key_t skey;
+  sfdp_tenant_t *tenant;
+  u32 tenant_id;
+  f64 now = vlib_time_now (vm);
+  size_t msg_size;
+  u8 n_keys = sfdp_session_n_keys (session);
+  tenant = sfdp_tenant_at_index (sfdp, session->tenant_idx);
+  tenant_id = tenant->tenant_id;
+  msg_size = sizeof (*mp) + sizeof (mp->keys[0]) * n_keys;
+
+  mp = vl_msg_api_alloc_zero (msg_size);
+  mp->_vl_msg_id = ntohs (VL_API_SFDP_SESSION_DETAILS + sfdp->msg_id_base);
+
+  /* fill in the message */
+  mp->context = context;
+  mp->session_id = clib_host_to_net_u64 (session->session_id);
+  mp->thread_index = clib_host_to_net_u32 (thread_index);
+  mp->tenant_id = clib_host_to_net_u32 (tenant_id);
+  mp->session_idx = clib_host_to_net_u32 (session_index);
+  mp->session_type = sfdp_session_type_encode (session->type);
+  mp->protocol = ip_proto_encode (session->proto);
+  mp->state = sfdp_session_state_encode (session->state);
+  mp->remaining_time =
+    sfdp->expiry_callbacks.session_remaining_time (session, now);
+  mp->forward_bitmap =
+    clib_host_to_net_u64 (session->bitmaps[SFDP_FLOW_FORWARD]);
+  mp->reverse_bitmap =
+    clib_host_to_net_u64 (session->bitmaps[SFDP_FLOW_REVERSE]);
+  mp->n_keys = n_keys;
+  for (int i = 0; i < n_keys; i++)
+    {
+      if ((i == 0 &&
+          session->key_flags & SFDP_SESSION_KEY_FLAG_PRIMARY_VALID_IP4) ||
+         (i == 1 &&
+          session->key_flags & SFDP_SESSION_KEY_FLAG_SECONDARY_VALID_IP4))
+       {
+         sfdp_normalise_ip4_key (session, &skey.key4, i);
+         sfdp_session_ip46_key_encode (&skey, IP46_TYPE_IP4, &mp->keys[i]);
+       }
+      if ((i == 0 &&
+          session->key_flags & SFDP_SESSION_KEY_FLAG_PRIMARY_VALID_IP6) ||
+         (i == 1 &&
+          session->key_flags & SFDP_SESSION_KEY_FLAG_SECONDARY_VALID_IP6))
+       {
+         sfdp_normalise_ip6_key (session, &skey.key6, i);
+         sfdp_session_ip46_key_encode (&skey, IP46_TYPE_IP6, &mp->keys[i]);
+       }
+    }
+  vl_api_send_msg (rp, (u8 *) mp);
+}
+
+static void
+vl_api_sfdp_session_dump_t_handler (vl_api_sfdp_session_dump_t *mp)
+{
+  sfdp_main_t *sfdp = &sfdp_main;
+  sfdp_session_t *session;
+  uword session_index;
+  vl_api_registration_t *rp;
+  rp = vl_api_client_index_to_registration (mp->client_index);
+  if (rp == 0)
+    return;
+
+  sfdp_foreach_session (sfdp, session_index, session)
+  {
+    sfdp_send_session_details (rp, mp->context, session_index,
+                              session->owning_thread_index, session);
+  }
+}
+
+static void
+sfdp_send_tenant_details (vl_api_registration_t *rp, u32 context,
+                         u16 tenant_index, sfdp_tenant_t *tenant)
+{
+  sfdp_main_t *sfdp = &sfdp_main;
+  vl_api_sfdp_tenant_details_t *mp;
+  sfdp_timeout_t *timeout;
+
+  size_t msg_size;
+  msg_size = sizeof (*mp) + SFDP_MAX_TIMEOUTS * sizeof (mp->timeout[0]);
+
+  mp = vl_msg_api_alloc_zero (msg_size);
+  mp->_vl_msg_id = ntohs (VL_API_SFDP_TENANT_DETAILS + sfdp->msg_id_base);
+
+  /* fill in the message */
+  mp->context = context;
+  mp->context_id = clib_host_to_net_u32 (tenant->context_id);
+  mp->index = clib_host_to_net_u32 (tenant_index);
+  mp->forward_bitmap =
+    clib_host_to_net_u64 (tenant->bitmaps[SFDP_FLOW_FORWARD]);
+  mp->reverse_bitmap =
+    clib_host_to_net_u64 (tenant->bitmaps[SFDP_FLOW_REVERSE]);
+  mp->n_timeout = clib_host_to_net_u32 (SFDP_MAX_TIMEOUTS);
+  sfdp_foreach_timeout (sfdp, timeout)
+  {
+    u32 idx = timeout - sfdp->timeouts;
+    mp->timeout[idx] = clib_host_to_net_u32 (tenant->timeouts[idx]);
+  }
+
+  vl_api_send_msg (rp, (u8 *) mp);
+}
+
+static void
+vl_api_sfdp_tenant_dump_t_handler (vl_api_sfdp_tenant_dump_t *mp)
+{
+  sfdp_main_t *sfdp = &sfdp_main;
+  sfdp_tenant_t *tenant;
+  u16 tenant_index;
+  vl_api_registration_t *rp;
+  rp = vl_api_client_index_to_registration (mp->client_index);
+  if (rp == 0)
+    return;
+
+  pool_foreach_index (tenant_index, sfdp->tenants)
+    {
+      tenant = sfdp_tenant_at_index (sfdp, tenant_index);
+      sfdp_send_tenant_details (rp, mp->context, tenant_index, tenant);
+    }
+}
+
+#include <vnet/sfdp/sfdp.api.c>
+static clib_error_t *
+sfdp_plugin_api_hookup (vlib_main_t *vm)
+{
+  sfdp_main_t *sfdp = &sfdp_main;
+  sfdp->msg_id_base = setup_message_id_table ();
+  return 0;
+}
+VLIB_API_INIT_FUNCTION (sfdp_plugin_api_hookup);
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/sfdp/callbacks.c b/src/vnet/sfdp/callbacks.c
new file mode 100644 (file)
index 0000000..b7eefec
--- /dev/null
@@ -0,0 +1,19 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright (c) 2025 Cisco Systems, Inc.
+ */
+
+#include <vlib/vlib.h>
+#include <vnet/sfdp/callbacks.h>
+
+sfdp_callback_main_t sfdp_callback_main;
+
+static clib_error_t *
+sfdp_callback_init (vlib_main_t *vm)
+{
+#define _(x, ...) SFDP_CALLBACK_BUILD_EFFECTIVE_LIST (x);
+  foreach_sfdp_callback_type
+#undef _
+    return 0;
+}
+
+VLIB_INIT_FUNCTION (sfdp_callback_init);
\ No newline at end of file
diff --git a/src/vnet/sfdp/callbacks.h b/src/vnet/sfdp/callbacks.h
new file mode 100644 (file)
index 0000000..a9c5f82
--- /dev/null
@@ -0,0 +1,126 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright (c) 2025 Cisco Systems, Inc.
+ */
+
+#ifndef __included_callbacks_h
+#define __included callbacks_h
+#include <vlib/vlib.h>
+
+#define foreach_sfdp_callback_type                                            \
+  /* Called by sfdp-lookup after new flows have been created but before       \
+   * packets have been fully processed.                                       \
+   * This gives the opportunity for the SFDP user to initialize               \
+   * per-flow state or even modify the service chain before it gets used. */  \
+  _ (notify_new_sessions, u32, const u32 *, u32)                              \
+  /* Called during pre-input phase to notify that certain flows are being     \
+   * removed. This gives the opportunity for the SFDP user to reset per-flow  \
+   * state while no packet is currently being processed by this thread.       \
+   * This is called before any flow state is removed. */                      \
+  _ (notify_deleted_sessions, u32, const u32 *, u32)
+
+#define SFDP_CB_ELT_LIST_TYPE_DECLARE(fn_ptr_type)                            \
+  typedef struct sfdp_cb_elt_list_##fn_ptr_type##_s                           \
+  {                                                                           \
+    struct sfdp_cb_elt_list_##fn_ptr_type##_s *next;                          \
+    fn_ptr_type fun;                                                          \
+    const char *name;                                                         \
+  } sfdp_cb_elt_list_##fn_ptr_type##_t;
+
+#define SFDP_CB_ELT_LIST_TYPE(fn_ptr_type) sfdp_cb_elt_list_##fn_ptr_type##_t
+
+#define SFDP_CALL_CB_ELT(ptr, x...) ((ptr)->fun (x))
+
+#ifndef CLIB_MARCH_VARIANT
+#define SFDP_REGISTER_CALLBACK(type, head, name2)                             \
+  static SFDP_CB_ELT_LIST_TYPE (type)                                         \
+    sfdp_callback_registration_##type_##name2;                                \
+  __attribute__ ((__constructor__)) static void                               \
+    __sfdp_callback_add_registration_##type_##name2 (void)                    \
+                                                                              \
+  {                                                                           \
+    sfdp_callback_main_t *sfdp = &sfdp_callback_main;                         \
+    SFDP_CB_ELT_LIST_TYPE (type) *r =                                         \
+      &sfdp_callback_registration_##type_##name2;                             \
+    r->next = sfdp->head;                                                     \
+    sfdp->head = r;                                                           \
+    r->name = #name2;                                                         \
+  }                                                                           \
+  static SFDP_CB_ELT_LIST_TYPE (type) sfdp_callback_registration_##type_##name2
+
+#define SFDP_BLACKLIST_CALLBACK(type, head, name)                             \
+  __attribute__ ((__constructor__)) static void                               \
+    __sfdp_callback_blacklist_registration_##type_##name (void)               \
+                                                                              \
+  {                                                                           \
+    sfdp_callback_main_t *sfdp = &sfdp_callback_main;                         \
+    vec_add1 (sfdp->blacklist_##head, (const u8 *) #name);                    \
+  }
+#else
+#define SFDP_REGISTER_CALLBACK(type, head, name2)                             \
+  static SFDP_CB_ELT_LIST_TYPE (type)                                         \
+    __clib_unused sfdp_callback_registration_##type_##name2
+#define SFDP_BLACKLIST_CALLBACK(type, head, name)
+#endif
+
+#define _(x, y, z...) typedef y (*sfdp_##x##_cb_t) (z);
+foreach_sfdp_callback_type
+#undef _
+
+#define _(x, ...) SFDP_CB_ELT_LIST_TYPE_DECLARE (sfdp_##x##_cb_t)
+  foreach_sfdp_callback_type
+#undef _
+
+  typedef struct
+{
+#define _(x, ...)                                                             \
+  SFDP_CB_ELT_LIST_TYPE (sfdp_##x##_cb_t) * head_##x;                         \
+  const u8 **blacklist_head_##x;                                              \
+  SFDP_CB_ELT_LIST_TYPE (sfdp_##x##_cb_t) * *effective_##x;
+  foreach_sfdp_callback_type
+#undef _
+} sfdp_callback_main_t;
+
+extern sfdp_callback_main_t sfdp_callback_main;
+
+#define SFDP_CALLBACK_BUILD_EFFECTIVE_LIST(x)                                 \
+  do                                                                          \
+    {                                                                         \
+      typeof (sfdp_callback_main.head_##x) hd = sfdp_callback_main.head_##x;  \
+      while (hd != 0)                                                         \
+       {                                                                     \
+         u8 excluded = 0;                                                    \
+         const u8 **cur;                                                     \
+         vec_foreach (cur, sfdp_callback_main.blacklist_head_##x)            \
+           if (!clib_strncmp ((const char *) cur[0], hd->name, 256))         \
+             excluded = 1;                                                   \
+         if (excluded == 0)                                                  \
+           vec_add1 (sfdp_callback_main.effective_##x, hd);                  \
+         hd = hd->next;                                                      \
+       }                                                                     \
+    }                                                                         \
+  while (0)
+
+#define SFDP_CALLBACKS_CALL(x, y...)                                          \
+  do                                                                          \
+    {                                                                         \
+      typeof (sfdp_callback_main.effective_##x) elt;                          \
+      vec_foreach (elt, sfdp_callback_main.effective_##x)                     \
+       SFDP_CALL_CB_ELT (elt[0], y);                                         \
+    }                                                                         \
+  while (0)
+
+/* Per callback type specializations */
+#define SFDP_REGISTER_NEW_SESSIONS_CALLBACK(name)                             \
+  SFDP_REGISTER_CALLBACK (sfdp_notify_new_sessions_cb_t,                      \
+                         head_notify_new_sessions, name)
+#define SFDP_BLACKLIST_NEW_SESSIONS_CALLBACK(name)                            \
+  SFDP_BLACKLIST_CALLBACK (sfdp_notify_new_sessions_cb_t,                     \
+                          head_notify_new_sessions, name)
+
+#define SFDP_REGISTER_DELETED_SESSIONS_CALLBACK(name)                         \
+  SFDP_REGISTER_CALLBACK (sfdp_notify_deleted_sessions_cb_t,                  \
+                         head_notify_deleted_sessions, name)
+#define SFDP_BLACKLIST_DELETED_SESSIONS_CALLBACK(name)                        \
+  SFDP_BLACKLIST_CALLBACK (sfdp_notify_deleted_sessions_cb_t,                 \
+                          head_notify_deleted_sessions, name)
+#endif
diff --git a/src/vnet/sfdp/cli.c b/src/vnet/sfdp/cli.c
new file mode 100644 (file)
index 0000000..b0149bb
--- /dev/null
@@ -0,0 +1,551 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright (c) 2025 Cisco Systems, Inc.
+ */
+
+#include <vnet/plugin/plugin.h>
+#include <vnet/vnet.h>
+#include <vnet/sfdp/sfdp.h>
+#include <vnet/sfdp/service.h>
+
+/*
+ * add CLI:
+ * sfdp tenant <add/del> <tenant-id>
+ *
+ * it creates entry in the tenant pool. Default service chains in both
+ * directions is "sfdp-drop"
+ *
+ *
+ * add CLI:
+ * set sfdp services tenant <tenant-id> (SERVICE_NAME)+ <forward|reverse>
+ *
+ * configure tenant with a service chain for a given direction (forward or
+ * reverse)
+ *
+ */
+
+static clib_error_t *
+sfdp_tenant_add_del_command_fn (vlib_main_t *vm, unformat_input_t *input,
+                               vlib_cli_command_t *cmd)
+{
+  unformat_input_t line_input_, *line_input = &line_input_;
+  clib_error_t *err = 0;
+  sfdp_main_t *sfdp = &sfdp_main;
+  u8 is_del = 0;
+  u32 tenant_id = ~0;
+  u32 context_id = ~0;
+  if (!unformat_user (input, unformat_line_input, line_input))
+    return 0;
+  while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
+    {
+      if (unformat (line_input, "add %d", &tenant_id))
+       is_del = 0;
+      else if (unformat (line_input, "del %d", &tenant_id))
+       is_del = 1;
+      else if (unformat (line_input, "context %d", &context_id))
+       ;
+      else
+       {
+         err = unformat_parse_error (line_input);
+         goto done;
+       }
+    }
+  if (tenant_id == ~0)
+    {
+      err = clib_error_return (0, "missing tenant id");
+      goto done;
+    }
+  if (context_id == ~0)
+    context_id = tenant_id;
+  err = sfdp_tenant_add_del (sfdp, tenant_id, context_id, is_del);
+done:
+  unformat_free (line_input);
+  return err;
+}
+
+static clib_error_t *
+sfdp_set_services_command_fn (vlib_main_t *vm, unformat_input_t *input,
+                             vlib_cli_command_t *cmd)
+{
+  unformat_input_t line_input_, *line_input = &line_input_;
+  clib_error_t *err = 0;
+  sfdp_main_t *sfdp = &sfdp_main;
+  u32 tenant_id = ~0;
+  sfdp_bitmap_t bitmap = 0;
+  u8 direction = ~0;
+
+  if (!unformat_user (input, unformat_line_input, line_input))
+    return 0;
+  while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
+    {
+      if (unformat (line_input, "tenant %d", &tenant_id))
+       ;
+      else if (unformat_user (line_input, unformat_sfdp_service_bitmap,
+                             &bitmap))
+       ;
+      else if (unformat (line_input, "forward"))
+       direction = SFDP_FLOW_FORWARD;
+      else if (unformat (line_input, "reverse"))
+       direction = SFDP_FLOW_REVERSE;
+      else
+       {
+         err = unformat_parse_error (line_input);
+         goto done;
+       }
+    }
+  if (tenant_id == ~0)
+    {
+      err = clib_error_return (0, "missing tenant id");
+      goto done;
+    }
+  if (direction == (u8) ~0)
+    {
+      err = clib_error_return (0, "missing direction");
+      goto done;
+    }
+  sfdp_set_services (sfdp, tenant_id, bitmap, direction);
+done:
+  unformat_free (line_input);
+  return err;
+}
+
+static_always_inline u32
+table_format_insert_sfdp_service (table_t *t,
+                                 sfdp_service_registration_t *service, u32 n)
+{
+  table_format_cell (t, n, 0, "%s", service->node_name);
+  table_set_cell_align (t, n, 0, TTAA_LEFT);
+  table_format_cell (t, n, 1, "%u", *(service->index_in_bitmap));
+  table_set_cell_align (t, n, 1, TTAA_CENTER);
+  table_format_cell (t, n, 2, "%s", (service->is_terminal) ? "T" : "");
+  table_set_cell_align (t, n, 1, TTAA_CENTER);
+  return n + 1;
+}
+
+static clib_error_t *
+sfdp_show_services_fn (vlib_main_t *vm, unformat_input_t *input,
+                      vlib_cli_command_t *cmd)
+{
+  sfdp_service_main_t *vsm = &sfdp_service_main;
+  sfdp_service_registration_t ***services_for_scope;
+
+  vec_foreach (services_for_scope, vsm->services_per_scope_index)
+    {
+      table_t service_table_ = {}, *service_table = &service_table_;
+      u32 scope_index = services_for_scope - vsm->services_per_scope_index;
+      sfdp_service_registration_t **service;
+      table_format_title (service_table,
+                         "Registered SFDP services for scope '%s'",
+                         vsm->scope_names[scope_index]);
+      table_add_header_col (service_table, 3, "Node name", "Index",
+                           "Terminal");
+
+      u32 n = 0;
+      vec_foreach (service, *services_for_scope)
+       {
+         n = table_format_insert_sfdp_service (service_table, *service, n);
+       }
+      vlib_cli_output (vm, "%U", format_table, service_table);
+      vlib_cli_output (vm, "%u / 64 registered services", n);
+      table_free (service_table);
+    }
+  return 0;
+}
+
+static clib_error_t *
+sfdp_set_timeout_command_fn (vlib_main_t *vm, unformat_input_t *input,
+                            vlib_cli_command_t *cmd)
+{
+  unformat_input_t line_input_, *line_input = &line_input_;
+  clib_error_t *err = 0;
+  sfdp_main_t *sfdp = &sfdp_main;
+  u32 tenant_id = ~0;
+  u32 timeout_idx = ~0;
+  u32 timeout_val = ~0;
+
+  if (!unformat_user (input, unformat_line_input, line_input))
+    return 0;
+  while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
+    {
+      if (unformat (line_input, "tenant %d", &tenant_id))
+       ;
+      else if (unformat (line_input, "%U %d", unformat_sfdp_timeout_name,
+                        &timeout_idx, &timeout_val))
+       ;
+      else
+       {
+         err = unformat_parse_error (line_input);
+         goto done;
+       }
+    }
+  if (tenant_id == ~0)
+    {
+      err = clib_error_return (0, "missing tenant id");
+      goto done;
+    }
+  if (timeout_idx == ~0)
+    {
+      err = clib_error_return (0, "missing timeout");
+      goto done;
+    }
+
+  err = sfdp_set_timeout (sfdp, tenant_id, timeout_idx, timeout_val);
+done:
+  unformat_free (line_input);
+  return err;
+}
+
+static clib_error_t *
+sfdp_set_sp_node_command_fn (vlib_main_t *vm, unformat_input_t *input,
+                            vlib_cli_command_t *cmd)
+{
+  unformat_input_t line_input_, *line_input = &line_input_;
+  clib_error_t *err = 0;
+  sfdp_main_t *sfdp = &sfdp_main;
+  u32 tenant_id = ~0;
+  u32 sp_idx = ~0;
+  u32 node_index = ~0;
+
+  if (!unformat_user (input, unformat_line_input, line_input))
+    return 0;
+  while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
+    {
+      if (unformat (line_input, "tenant %d", &tenant_id))
+       ;
+      else if (unformat (line_input, "node %U", unformat_vlib_node, vm,
+                        &node_index))
+       ;
+      else if (unformat (line_input, "%U", unformat_sfdp_sp_node, &sp_idx))
+       ;
+      else
+       {
+         err = unformat_parse_error (line_input);
+         goto done;
+       }
+    }
+  if (tenant_id == ~0)
+    {
+      err = clib_error_return (0, "missing tenant id");
+      goto done;
+    }
+  if (node_index == ~0)
+    {
+      err = clib_error_return (0, "missing node");
+      goto done;
+    }
+  if (sp_idx == ~0)
+    {
+      err = clib_error_return (0, "missing slow-path");
+      goto done;
+    }
+
+  err = sfdp_set_sp_node (sfdp, tenant_id, sp_idx, node_index);
+done:
+  unformat_free (line_input);
+  return err;
+}
+
+static clib_error_t *
+sfdp_set_icmp_error_node_command_fn (vlib_main_t *vm, unformat_input_t *input,
+                                    vlib_cli_command_t *cmd)
+{
+  unformat_input_t line_input_, *line_input = &line_input_;
+  clib_error_t *err = 0;
+  sfdp_main_t *sfdp = &sfdp_main;
+  u32 tenant_id = ~0;
+  u32 node_index = ~0;
+  u8 ip46 = 0;
+
+  if (!unformat_user (input, unformat_line_input, line_input))
+    return 0;
+  while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
+    {
+      if (unformat (line_input, "tenant %d", &tenant_id))
+       ;
+      else if (unformat (line_input, "node %U", unformat_vlib_node, vm,
+                        &node_index))
+       ;
+      else if (unformat (line_input, "ip4"))
+       ip46 = 1;
+      else if (unformat (line_input, "ip6"))
+       ip46 = 2;
+      else
+       {
+         err = unformat_parse_error (line_input);
+         goto done;
+       }
+    }
+  if (tenant_id == ~0)
+    {
+      err = clib_error_return (0, "missing tenant id");
+      goto done;
+    }
+  if (node_index == ~0)
+    {
+      err = clib_error_return (0, "missing node");
+      goto done;
+    }
+  if (ip46 == 0)
+    {
+      err = clib_error_return (0, "missing adress family");
+      goto done;
+    }
+
+  err = sfdp_set_icmp_error_node (sfdp, tenant_id, ip46 - 1, node_index);
+
+done:
+  unformat_free (line_input);
+  return err;
+}
+
+static clib_error_t *
+sfdp_show_sessions_command_fn (vlib_main_t *vm, unformat_input_t *input,
+                              vlib_cli_command_t *cmd)
+{
+  unformat_input_t line_input_, *line_input = &line_input_;
+  clib_error_t *err = 0;
+  sfdp_main_t *sfdp = &sfdp_main;
+  sfdp_session_t *session;
+  u32 session_index;
+  sfdp_tenant_t *tenant;
+  u32 tenant_id = ~0;
+  u32 max_output_value = 20;
+  bool is_show_all = false;
+  f64 now = vlib_time_now (vm);
+
+  if (unformat_user (input, unformat_line_input, line_input))
+    {
+      while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
+       {
+         if (unformat (line_input, "tenant %u", &tenant_id))
+           ;
+         else if (unformat (line_input, "max %u", &max_output_value))
+           ;
+         else if (unformat (line_input, "unsafe-show-all"))
+           is_show_all = true;
+         else
+           {
+             err = unformat_parse_error (line_input);
+             break;
+           }
+       }
+      unformat_free (line_input);
+    }
+
+  if (!is_show_all && max_output_value == 0)
+    err = clib_error_return (0, "Please specify a positive integer for max");
+
+  if (!err)
+    {
+      table_t session_table_ = {}, *session_table = &session_table_;
+      u32 n = 0;
+      sfdp_table_format_add_header_col (session_table);
+      sfdp_foreach_session (sfdp, session_index, session)
+      {
+       tenant = sfdp_tenant_at_index (sfdp, session->tenant_idx);
+       if (tenant_id != ~0 && tenant_id != tenant->tenant_id)
+         continue;
+       n = sfdp_table_format_insert_session (session_table, n, session_index,
+                                             session, tenant->tenant_id, now);
+
+       if (!is_show_all && n >= max_output_value)
+         break;
+      }
+      vlib_cli_output (vm, "%U", format_table, session_table);
+      if (n < pool_elts (sfdp->sessions))
+       {
+         vlib_cli_output (vm, "Only %u sessions displayed, %u ignored", n,
+                          pool_elts (sfdp->sessions) - n);
+       }
+
+      table_free (session_table);
+    }
+
+  return err;
+}
+
+static clib_error_t *
+sfdp_show_session_detail_command_fn (vlib_main_t *vm, unformat_input_t *input,
+                                    vlib_cli_command_t *cmd)
+{
+  unformat_input_t line_input_, *line_input = &line_input_;
+  clib_error_t *err = 0;
+  sfdp_main_t *sfdp = &sfdp_main;
+  clib_bihash_kv_8_8_t kv = { 0 };
+  f64 now = vlib_time_now (vm);
+  u32 session_index;
+  u64 session_id;
+  if (unformat_user (input, unformat_line_input, line_input))
+    {
+      if (unformat_check_input (line_input) == UNFORMAT_END_OF_INPUT ||
+         unformat (line_input, "0x%X", sizeof (session_id), &session_id) == 0)
+       err = unformat_parse_error (line_input);
+      unformat_free (line_input);
+    }
+  else
+    err = clib_error_return (0, "No session id provided");
+
+  if (!err)
+    {
+      kv.key = session_id;
+      if (!clib_bihash_search_inline_8_8 (&sfdp->session_index_by_id, &kv))
+       {
+         session_index = sfdp_session_index_from_lookup (kv.value);
+         vlib_cli_output (vm, "%U", format_sfdp_session_detail, session_index,
+                          now);
+       }
+      else
+       {
+         err =
+           clib_error_return (0, "Session id 0x%llx not found", session_id);
+       }
+    }
+  return err;
+}
+
+static clib_error_t *
+sfdp_show_tenant_detail_command_fn (vlib_main_t *vm, unformat_input_t *input,
+                                   vlib_cli_command_t *cmd)
+{
+  unformat_input_t line_input_, *line_input = &line_input_;
+  clib_error_t *err = 0;
+  sfdp_main_t *sfdp = &sfdp_main;
+  sfdp_tenant_t *tenant;
+  u32 tenant_id = ~0;
+  u16 tenant_idx;
+  u8 detail = 0;
+  if (unformat_user (input, unformat_line_input, line_input))
+    {
+      while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
+       {
+         if (unformat (line_input, "%d detail", &tenant_id))
+           detail = 1;
+         else if (unformat (line_input, "%d", &tenant_id))
+           ;
+         else
+           {
+             err = unformat_parse_error (line_input);
+             break;
+           }
+       }
+      unformat_free (line_input);
+    }
+  if (err)
+    return err;
+
+  pool_foreach_index (tenant_idx, sfdp->tenants)
+    {
+      tenant = sfdp_tenant_at_index (sfdp, tenant_idx);
+
+      if (tenant_id != ~0 && tenant->tenant_id != tenant_id)
+       continue;
+
+      vlib_cli_output (vm, "Tenant %d", tenant->tenant_id);
+      vlib_cli_output (vm, "  %U", format_sfdp_tenant, sfdp, tenant_idx,
+                      tenant);
+      if (detail)
+       vlib_cli_output (vm, "  %U", format_sfdp_tenant_extra, sfdp,
+                        tenant_idx, tenant);
+    }
+
+  return err;
+}
+
+static clib_error_t *
+sfdp_show_sfdp_status_command_fn (vlib_main_t *vm, unformat_input_t *input,
+                                 vlib_cli_command_t *cmd)
+{
+  sfdp_main_t *sfdp = &sfdp_main;
+  u64 max_sessions = sfdp_num_sessions ();
+  u64 free_sessions = sfdp_remaining_sessions_in_pool ();
+  u64 active_sessions = sfdp_real_active_sessions ();
+  f64 active_percent = (((f64) (100)) * active_sessions) / max_sessions;
+
+  vlib_cli_output (vm, "sfdp status:\n");
+  vlib_cli_output (vm, "  max sessions: %lu\n", max_sessions);
+  vlib_cli_output (vm, "  active sessions: %lu (%.2f%%)\n", active_sessions,
+                  active_percent);
+  vlib_cli_output (vm, "  free sessions: %lu\n", free_sessions);
+  vlib_cli_output (vm, "  eviction sessions margin: %u\n",
+                  sfdp->eviction_sessions_margin);
+  vlib_cli_output (vm, "  max sessions cache per thread: %lu\n",
+                  sfdp_num_sessions_cache_per_thread ());
+  vlib_cli_output (vm, "  max tenants: %llu\n", 1ULL << sfdp->log2_tenants);
+
+  // iterate over all threads
+  sfdp_per_thread_data_t *ptd;
+  vec_foreach (ptd, sfdp->per_thread_data)
+    {
+      u32 index = ptd - sfdp->per_thread_data;
+      vlib_cli_output (vm, "    [%u] active sessions: %lu\n", index,
+                      ptd->n_sessions);
+      vlib_cli_output (vm, "    [%u] cached sessions: %lu\n", index,
+                      vec_len (ptd->session_freelist));
+    }
+
+  return NULL;
+}
+
+VLIB_CLI_COMMAND (sfdp_tenant_add_del_command, static) = {
+  .path = "sfdp tenant",
+  .short_help = "sfdp tenant <add|del> <tenant-id> context <context-id>",
+  .function = sfdp_tenant_add_del_command_fn,
+};
+
+VLIB_CLI_COMMAND (sfdp_set_services_command, static) = {
+  .path = "set sfdp services",
+  .short_help = "set sfdp services tenant <tenant-id>"
+               " [SERVICE_NAME]+ <forward|reverse>",
+  .function = sfdp_set_services_command_fn,
+};
+
+VLIB_CLI_COMMAND (sfdp_show_services_command, static) = {
+  .path = "show sfdp services",
+  .short_help = "show sfdp services",
+  .function = sfdp_show_services_fn,
+};
+
+VLIB_CLI_COMMAND (show_sfdp_sessions_command, static) = {
+  .path = "show sfdp session-table",
+  .short_help = "show sfdp session-table [tenant <tenant-id>] "
+               "[max <max_value>] [unsafe-show-all]",
+  .function = sfdp_show_sessions_command_fn,
+};
+
+VLIB_CLI_COMMAND (show_sfdp_detail_command, static) = {
+  .path = "show sfdp session-detail",
+  .short_help = "show sfdp session-detail 0x<session-id>",
+  .function = sfdp_show_session_detail_command_fn,
+};
+
+VLIB_CLI_COMMAND (show_sfdp_tenant, static) = {
+  .path = "show sfdp tenant",
+  .short_help = "show sfdp tenant [<tenant-id> [detail]]",
+  .function = sfdp_show_tenant_detail_command_fn,
+};
+
+VLIB_CLI_COMMAND (sfdp_show_sfdp_status_command, static) = {
+  .path = "show sfdp status",
+  .short_help = "show sfdp status",
+  .function = sfdp_show_sfdp_status_command_fn,
+};
+
+VLIB_CLI_COMMAND (sfdp_set_timeout_command, static) = {
+  .path = "set sfdp timeout",
+  .short_help = "set sfdp timeout tenant <tenant-id>"
+               " <timeout-name> <timeout-value>",
+  .function = sfdp_set_timeout_command_fn
+};
+
+VLIB_CLI_COMMAND (sfdp_set_sp_node_command, static) = {
+  .path = "set sfdp sp-node",
+  .short_help = "set sfdp sp-node tenant <tenant-id>"
+               " <sp-name> node <node-name>",
+  .function = sfdp_set_sp_node_command_fn
+};
+
+VLIB_CLI_COMMAND (sfdp_set_icmp_error_node_command, static) = {
+  .path = "set sfdp icmp-error-node",
+  .short_help = "set sfdp icmp-error-node tenant <tenant-id>"
+               " <ip4|ip6> node <node-name>",
+  .function = sfdp_set_icmp_error_node_command_fn
+};
diff --git a/src/vnet/sfdp/common.h b/src/vnet/sfdp/common.h
new file mode 100644 (file)
index 0000000..53ecb39
--- /dev/null
@@ -0,0 +1,53 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright (c) 2025 Cisco Systems, Inc.
+ */
+
+#ifndef __included_sfdp_common_h__
+#define __included_sfdp_common_h__
+
+#include <vnet/vnet.h>
+
+#define foreach_sfdp_buffer_flag                                              \
+  _ (SV_REASSEMBLED, "sv_reassembled")                                        \
+  _ (FULL_REASSEMBLED, "full_reassembled")                                    \
+  _ (IP6_FINAL_PROTO_VALID, "ip6_final_proto_valid")
+
+enum
+{
+#define _(sym, str) SFDP_BUFFER_FLAG_BIT_##sym,
+  foreach_sfdp_buffer_flag
+#undef _
+};
+
+enum
+{
+#define _(sym, str) SFDP_BUFFER_FLAG_##sym = 0x1 << SFDP_BUFFER_FLAG_BIT_##sym,
+  foreach_sfdp_buffer_flag
+#undef _
+};
+typedef u64 sfdp_bitmap_t;
+typedef u16 session_version_t;
+typedef struct
+{
+  sfdp_bitmap_t service_bitmap;
+  u16 tenant_index;
+  session_version_t session_version_before_handoff;
+  u8 flags;
+  u8 tcp_flags;
+  u8 ip6_final_proto;
+} __attribute__ ((may_alias)) sfdp_buffer_opaque_t;
+
+STATIC_ASSERT (sizeof (sfdp_buffer_opaque_t) <=
+                sizeof (vnet_buffer ((vlib_buffer_t *) 0)->unused),
+              "size of sfdp_buffer_opaque_t must be <= size of "
+              "vnet_buffer_opaque_t->unused");
+
+#define sfdp_buffer(b) ((sfdp_buffer_opaque_t *) vnet_buffer (b)->unused)
+
+/* Sometimes a VDCP packet needs to undergo an excursion outside of SFDP (e.g.,
+ * for reassembly). This is used to save the SFDP metadata during this
+ * excursion
+ */
+#define sfdp_buffer2(b) ((sfdp_buffer_opaque_t *) vnet_buffer2 (b)->unused)
+
+#endif
diff --git a/src/vnet/sfdp/drop/node.c b/src/vnet/sfdp/drop/node.c
new file mode 100644 (file)
index 0000000..cb41be8
--- /dev/null
@@ -0,0 +1,103 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright (c) 2025 Cisco Systems, Inc.
+ */
+
+#include <vlib/vlib.h>
+#include <vnet/sfdp/service.h>
+#define foreach_sfdp_drop_error _ (DROP, "drop")
+
+typedef enum
+{
+#define _(sym, str) SFDP_DROP_ERROR_##sym,
+  foreach_sfdp_drop_error
+#undef _
+    SFDP_DROP_N_ERROR,
+} sfdp_drop_error_t;
+
+static char *sfdp_drop_error_strings[] = {
+#define _(sym, string) string,
+  foreach_sfdp_drop_error
+#undef _
+};
+
+#define foreach_sfdp_drop_next _ (DROP, "error-drop")
+
+typedef enum
+{
+#define _(n, x) SFDP_DROP_NEXT_##n,
+  foreach_sfdp_drop_next
+#undef _
+    SFDP_DROP_N_NEXT
+} sfdp_drop_next_t;
+
+typedef struct
+{
+  u32 flow_id;
+} sfdp_drop_trace_t;
+
+static u8 *
+format_sfdp_drop_trace (u8 *s, va_list *args)
+{
+  vlib_main_t __clib_unused *vm = va_arg (*args, vlib_main_t *);
+  vlib_node_t __clib_unused *node = va_arg (*args, vlib_node_t *);
+  sfdp_drop_trace_t *t = va_arg (*args, sfdp_drop_trace_t *);
+
+  s = format (s, "sfdp-drop: flow-id %u (session %u, %s)", t->flow_id,
+             t->flow_id >> 1, t->flow_id & 0x1 ? "reverse" : "forward");
+  return s;
+}
+
+VLIB_NODE_FN (sfdp_drop_node)
+(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame)
+{
+  vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b;
+  u32 *from = vlib_frame_vector_args (frame);
+  u32 n_left = frame->n_vectors;
+
+  vlib_buffer_enqueue_to_single_next (vm, node, from, SFDP_DROP_NEXT_DROP,
+                                     n_left);
+  vlib_node_increment_counter (vm, node->node_index, SFDP_DROP_ERROR_DROP,
+                              n_left);
+  if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE)))
+    {
+      int i;
+      vlib_get_buffers (vm, from, bufs, n_left);
+      b = bufs;
+      for (i = 0; i < n_left; i++)
+       {
+         if (b[0]->flags & VLIB_BUFFER_IS_TRACED)
+           {
+             sfdp_drop_trace_t *t =
+               vlib_add_trace (vm, node, b[0], sizeof (*t));
+             t->flow_id = b[0]->flow_id;
+             b++;
+           }
+         else
+           break;
+       }
+    }
+  return frame->n_vectors;
+}
+
+VLIB_REGISTER_NODE (sfdp_drop_node) = {
+  .name = "sfdp-drop",
+  .vector_size = sizeof (u32),
+  .format_trace = format_sfdp_drop_trace,
+  .type = VLIB_NODE_TYPE_INTERNAL,
+
+  .n_errors = ARRAY_LEN (sfdp_drop_error_strings),
+  .error_strings = sfdp_drop_error_strings,
+
+  .n_next_nodes = SFDP_DROP_N_NEXT,
+  .next_nodes = {
+#define _(n, x) [SFDP_DROP_NEXT_##n] = x,
+          foreach_sfdp_drop_next
+#undef _
+  }
+
+};
+
+SFDP_SERVICE_DEFINE (drop) = { .node_name = "sfdp-drop",
+                              .runs_before = SFDP_SERVICES (0),
+                              .runs_after = SFDP_SERVICES (0),
+                              .is_terminal = 1 };
\ No newline at end of file
diff --git a/src/vnet/sfdp/expiry/expiry.c b/src/vnet/sfdp/expiry/expiry.c
new file mode 100644 (file)
index 0000000..2d8676e
--- /dev/null
@@ -0,0 +1,165 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright (c) 2025 Cisco Systems, Inc.
+ */
+
+#include <vnet/sfdp/expiry/expiry.h>
+
+#include <vlib/vlib.h>
+#include <vnet/sfdp/sfdp_funcs.h>
+
+u8 static expiry_is_enabled = 0;
+
+int
+sfdp_set_expiry_callbacks (const sfdp_expiry_callbacks_t *callbacks)
+{
+  sfdp_main_t *sfdp = &sfdp_main;
+  if (expiry_is_enabled)
+    {
+      return -1;
+    }
+  clib_memcpy (&sfdp->expiry_callbacks, callbacks, sizeof (*callbacks));
+  return 0;
+}
+
+int
+sfdp_init_timeouts (const sfdp_timeout_t *timeouts, u32 n)
+{
+  sfdp_main_t *sfdp = &sfdp_main;
+  if (expiry_is_enabled)
+    {
+      return -1;
+    }
+  clib_memset (sfdp->timeouts, 0, sizeof (sfdp->timeouts));
+  clib_memcpy (sfdp->timeouts, timeouts, sizeof (*timeouts) * n);
+  return 0;
+}
+
+void
+sfdp_enable_disable_expiry_node (u8 is_disable, int skip_main)
+{
+  u32 n_vms = vlib_num_workers () + 1;
+  for (int i = !!skip_main; i < n_vms; i++)
+    {
+      vlib_main_t *vm = vlib_get_main_by_index (i);
+      vlib_node_t *node = vlib_get_node_by_name (vm, (u8 *) "sfdp-expire");
+      vlib_node_set_state (vm, node->index,
+                          is_disable ? VLIB_NODE_STATE_DISABLED :
+                                       VLIB_NODE_STATE_POLLING);
+    }
+}
+
+void
+sfdp_enable_disable_expiry (u8 is_disable)
+{
+  sfdp_main_t *sfdp = &sfdp_main;
+
+  if (!is_disable)
+    { /* Init module first */
+      expiry_is_enabled = true;
+      sfdp->expiry_callbacks.enable ();
+    }
+
+  /* Start/stop pre-input node */
+  sfdp_enable_disable_expiry_node (is_disable, sfdp->no_main);
+
+  if (is_disable)
+    { /* De-init module last */
+      sfdp->expiry_callbacks.disable ();
+      expiry_is_enabled = false;
+    }
+}
+
+#define foreach_sfdp_expire_error                                             \
+  _ (NODE_CALLED, "node-called", INFO, "node called")                         \
+  _ (EXPIRED, "expired", INFO, "session expired")                             \
+  _ (REQUESTED_EVICTION, "requested-eviction", INFO, "requested eviction")
+
+typedef enum
+{
+#define _(sym, name, sev, str) SFDP_EXPIRE_ERROR_##sym,
+  foreach_sfdp_expire_error
+#undef _
+    SFDP_EXPIRE_N_ERROR,
+} sfdp_expire_error_t;
+
+static vlib_error_desc_t sfdp_expire_error_descriptors[] = {
+#define _(sym, name, sev, str) { name, str, VL_COUNTER_SEVERITY_##sev },
+  foreach_sfdp_expire_error
+#undef _
+};
+
+VLIB_NODE_FN (sfdp_expire_node)
+(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame)
+{
+  sfdp_main_t *sfdp = &sfdp_main;
+  u32 thread_index = vm->thread_index;
+  sfdp_per_thread_data_t *ptd =
+    vec_elt_at_index (sfdp->per_thread_data, thread_index);
+  u32 *session_index;
+
+  u32 n_remaining_sessions = sfdp_sessions_available_for_this_thread (ptd);
+  u32 desired_evictions =
+    (n_remaining_sessions < sfdp->eviction_sessions_margin) ?
+      (sfdp->eviction_sessions_margin - n_remaining_sessions) :
+      0;
+
+  /* Calling callback for expiries or evictions */
+  ptd->expired_sessions = sfdp->expiry_callbacks.expire_or_evict_sessions (
+    desired_evictions, ptd->expired_sessions);
+
+  vlib_node_increment_counter (vm, node->node_index,
+                              SFDP_EXPIRE_ERROR_NODE_CALLED, 1);
+  vlib_node_increment_counter (vm, node->node_index,
+                              SFDP_EXPIRE_ERROR_REQUESTED_EVICTION,
+                              desired_evictions);
+
+  if (vec_len (ptd->expired_sessions) == 0)
+    {
+      return 0;
+    }
+
+  sfdp_notify_deleted_sessions (sfdp, ptd->expired_sessions,
+                               vec_len (ptd->expired_sessions));
+
+  vec_foreach (session_index, ptd->expired_sessions)
+    {
+      sfdp_session_t *session = sfdp_session_at_index (*session_index);
+      sfdp_session_remove (sfdp, ptd, session, thread_index, *session_index);
+    }
+
+  vlib_node_increment_counter (vm, node->node_index, SFDP_EXPIRE_ERROR_EXPIRED,
+                              vec_len (ptd->expired_sessions));
+  vec_reset_length (ptd->expired_sessions);
+
+  /* TODO: some logic so that we are not called too often */
+  return 0;
+}
+
+clib_error_t *
+sfdp_set_eviction_sessions_margin (u32 margin)
+{
+  sfdp_main_t *sfdp = &sfdp_main;
+  u32 max = sfdp_num_sessions () / 2;
+  if (margin == ~0)
+    {
+      margin = SFDP_DEFAULT_EVICTION_SESSIONS_MARGIN;
+      margin = (margin > max) ? max : margin;
+    }
+
+  if (margin > max)
+    {
+      return clib_error_return (
+       0, "Cannot set a margin greater than half the flow table !");
+    }
+
+  sfdp->eviction_sessions_margin = margin;
+  return 0;
+}
+
+VLIB_REGISTER_NODE (sfdp_expire_node) = {
+  .name = "sfdp-expire",
+  .type = VLIB_NODE_TYPE_INPUT,
+  .n_errors = SFDP_EXPIRE_N_ERROR,
+  .error_counters = sfdp_expire_error_descriptors,
+  .state = VLIB_NODE_STATE_DISABLED
+};
diff --git a/src/vnet/sfdp/expiry/expiry.h b/src/vnet/sfdp/expiry/expiry.h
new file mode 100644 (file)
index 0000000..1a0b052
--- /dev/null
@@ -0,0 +1,116 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright (c) 2025 Cisco Systems, Inc.
+ */
+
+#ifndef __included_sfdp_expiry_h__
+#define __included_sfdp_expiry_h__
+#include <vppinfra/format.h>
+#include <vppinfra/types.h>
+
+/* Default margin before eviction is requested to expiry module. */
+#define SFDP_DEFAULT_EVICTION_SESSIONS_MARGIN (256 * 256) /* 256 vectors */
+
+/* Defined in sfdp.h, but needed in callback functions definitions */
+typedef struct sfdp_session sfdp_session_t;
+typedef struct sfdp_timeout sfdp_timeout_t;
+
+/* Defines callbacks used by sfdp to call expiry module. */
+typedef struct
+{
+  /* Called by sfdp when it's being enabled.
+   * The expiry module shouldn't do anything before this is called. */
+  void (*enable) ();
+
+  /* Called by sfdp when it's being disabled.
+   * The expiry module shouldn't do anything after this is called. */
+  void (*disable) ();
+
+  /* Called by sfdp on every pre-input step, on every worker thread.
+   * Provides an opportunity for the session expiry module to timeout flows,
+   * but also for sfdp to request a specific number of flows to be evicted.
+   * This is best-effort, and the module could return less than the number
+   * of evicted sessions.
+   * desired_expiries: number of requested flow expiries to be added to the
+   * vector. expired_sessions_vec: vec pointer to be filled with expired
+   * sessions. return: updated expired_sessions_vec (resize may change the
+   * vector pointer value). The expiry module may add fewer, or more, sessions
+   * than the requested number.
+   *
+   * Note: Upon placing a session index in expired_sessions_vec, the expiry
+   *       module shall have freed any associated resources, as sfdp will free
+   * it definitely.
+   */
+  u32 *(*expire_or_evict_sessions) (u32 desired_expiries,
+                                   u32 *expired_sessions_vec);
+
+  /* Called by sfdp-lookup after new session entry is created,
+   * but before the first packet gets procesed with it.
+   * This gives the opportunity for the session expiry module to initialize
+   * per-flow state before the packet is processed by any service. */
+  void (*notify_new_sessions) (const u32 *new_sessions, u32 len);
+
+  /* Shall return the flow's remaining time to live.
+   * Used by CLI table dump and API. */
+  f64 (*session_remaining_time) (sfdp_session_t *session, f64 now);
+
+  /* Shall format the session expiry information details.
+   * The variadic arguments used are:
+   * - sfdp_session_t *session
+   * - f64 now
+   * Note: If printed on more than one line, use provided indentation.
+   */
+  u8 *(*format_session_details) (u8 *s, va_list *args);
+
+} sfdp_expiry_callbacks_t;
+
+/* Check that sfdp_session_t::expiry_opaque holds expiry module data. */
+#define SFDP_EXPIRY_STATIC_ASSERT_FITS_IN_EXPIRY_OPAQUE(type)                 \
+  STATIC_ASSERT (sizeof (type) <=                                             \
+                  sizeof (((sfdp_session_t *) (0))->expiry_opaque),          \
+                #type " too big to fit in expiry_opaque");
+
+/* Casts sfdp_session_t::expiry_opaque into provided type. */
+#define SFDP_EXPIRY_SESSION(session, type)                                    \
+  ((type *) (sfdp_get_session_expiry_opaque (session)))
+
+/** Sets the expiry callbacks.
+ *
+ *  Returns 0 upon success, or a different value if called while sfdp is
+ *  already enabled.
+ */
+int sfdp_set_expiry_callbacks (const sfdp_expiry_callbacks_t *callbacks);
+
+/** Provides initial timeout names and defaults to sfdp-core.
+ *
+ *  Returns 0 upon success, or a different value if called while sfdp is
+ *  already enabled.
+ */
+int sfdp_init_timeouts (const sfdp_timeout_t *timeouts, u32 n);
+
+/** Called by sfdp when enabling/disabling expiry. */
+void sfdp_enable_disable_expiry (u8 is_disable);
+
+/** Called by sfdp_enable_disable_expiry to set the sfdp-expiry pre-input
+ *  to disabled or polling state. */
+void sfdp_enable_disable_expiry_node (u8 is_disable, int skip_main);
+
+/** Sets the sessions-count margin used to enable flow eviction
+ *
+ *  Once the number of remaining available sessions passes below the margin.
+ *  the expiry module will be asked to remove existing sessions.
+ *
+ *  The value used depends on the expiry module implementation. If the
+ *  expiry module can synchronously delete all the needed sessions, then
+ *  the value shall be equal to the maximum number of new sessions that
+ *  can be processed in a single VPP loop.
+ *  Otherwise, a greater value shall be used, as to leave enough time
+ *  for the expiry module to evict flows without taking a risk to run out
+ *  of flow entries.
+ *
+ *  This function accepts ~0, which will set the margin to a default value.
+ */
+clib_error_t *sfdp_set_eviction_sessions_margin (u32 margin);
+
+void sfdp_check_eviction_sessions_margin ();
+
+#endif /* __included_sfdp_expiry_h__ */
diff --git a/src/vnet/sfdp/expiry/expiry_cli.c b/src/vnet/sfdp/expiry/expiry_cli.c
new file mode 100644 (file)
index 0000000..9c6ae15
--- /dev/null
@@ -0,0 +1,87 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright (c) 2025 Cisco Systems, Inc.
+ */
+
+#include <vnet/sfdp/sfdp.h>
+
+#include <vlib/vlib.h>
+
+static clib_error_t *
+sfdp_set_eviction_sessions_margin_fn (vlib_main_t *vm, unformat_input_t *input,
+                                     vlib_cli_command_t *cmd)
+{
+  unformat_input_t line_input_, *line_input = &line_input_;
+  clib_error_t *err = 0;
+  u32 eviction_sessions_margin = ~0;
+
+  if (!unformat_user (input, unformat_line_input, line_input))
+    return 0;
+  while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
+    {
+      if (unformat (line_input, "%u", &eviction_sessions_margin))
+       ;
+      else
+       {
+         err = unformat_parse_error (line_input);
+         unformat_free (line_input);
+         return err;
+       }
+    }
+  unformat_free (line_input);
+
+  if (eviction_sessions_margin == ~0)
+    {
+      return clib_error_return (0, "Missing margin value");
+    }
+  else if ((err = sfdp_set_eviction_sessions_margin (
+             eviction_sessions_margin)) != NULL)
+    {
+      return err;
+    }
+
+  return 0;
+}
+
+VLIB_CLI_COMMAND (set_eviction_sessions_margin, static) = {
+  .path = "set sfdp eviction sessions-margin",
+  .short_help = "set sfdp eviction sessions-margin <n-sessions>",
+  .function = sfdp_set_eviction_sessions_margin_fn
+};
+
+static clib_error_t *
+test_sfdp_expiry_disable_fn (vlib_main_t *vm, unformat_input_t *input,
+                            vlib_cli_command_t *cmd)
+{
+  (void) vm;
+  (void) input;
+  (void) cmd;
+  sfdp_enable_disable_expiry_node (true /* is_disable */,
+                                  false /* skip main*/);
+  return NULL;
+}
+
+/** Function used to force disable expiry in tests. */
+VLIB_CLI_COMMAND (test_sfdp_expiry_disable, static) = {
+  .path = "test sfdp expiry disable",
+  .short_help = "[TEST ONLY] disable sfdp-expiry node",
+  .function = test_sfdp_expiry_disable_fn
+};
+
+static clib_error_t *
+test_sfdp_expiry_enable_fn (vlib_main_t *vm, unformat_input_t *input,
+                           vlib_cli_command_t *cmd)
+{
+  (void) vm;
+  (void) input;
+  (void) cmd;
+  sfdp_enable_disable_expiry_node (false /* is_disable */,
+                                  false /* skip main*/);
+  return NULL;
+}
+
+/** Function used to enable-back expiry in tests. */
+VLIB_CLI_COMMAND (test_sfdp_expiry_enable, static) = {
+  .path = "test sfdp expiry enable",
+  .short_help = "[TEST ONLY] enable sfdp-expiry node",
+  .function = test_sfdp_expiry_enable_fn
+};
diff --git a/src/vnet/sfdp/format.c b/src/vnet/sfdp/format.c
new file mode 100644 (file)
index 0000000..4891741
--- /dev/null
@@ -0,0 +1,478 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright (c) 2025 Cisco Systems, Inc.
+ */
+
+#include <vlib/vlib.h>
+#include <vnet/vnet.h>
+#include <vnet/sfdp/service.h>
+#include <vnet/sfdp/sfdp.h>
+#include <vnet/sfdp/lookup/parser.h>
+#include <vppinfra/format_table.h>
+u8 *
+format_sfdp_session_state (u8 *s, va_list *args)
+{
+  u8 session_state = va_arg (*args, u32);
+#define _(n, str)                                                             \
+  if (session_state == SFDP_SESSION_STATE_##n)                                \
+    s = format (s, "%s", (str));
+  foreach_sfdp_session_state
+#undef _
+    return s;
+}
+
+u8 *
+format_sfdp_session_type (u8 *s, va_list *args)
+{
+  sfdp_parser_main_t *pm = &sfdp_parser_main;
+  sfdp_parser_data_t *pdata;
+  const char *parser_name;
+  u32 session_type = va_arg (*args, u32);
+  u32 parser_index = va_arg (*args, u32);
+  if (session_type == SFDP_SESSION_TYPE_IP4)
+    s = format (s, "ipv4");
+  else if (session_type == SFDP_SESSION_TYPE_IP6)
+    s = format (s, "ipv6");
+  else if (session_type == SFDP_SESSION_TYPE_USER)
+    {
+      pdata = vec_elt_at_index (pm->parsers, parser_index);
+      parser_name = pdata->name;
+      s = format (s, "custom-parser: %s", parser_name);
+    }
+  return s;
+}
+
+u8 *
+format_sfdp_ipv4_context_id (u8 *s, va_list *args)
+{
+  sfdp_session_ip4_key_t *k = va_arg (*args, sfdp_session_ip4_key_t *);
+  s = format (s, "%d", k->context_id);
+  return s;
+}
+
+u8 *
+format_sfdp_ipv4_ingress (u8 *s, va_list *args)
+{
+  sfdp_session_ip4_key_t *k = va_arg (*args, sfdp_session_ip4_key_t *);
+  s = format (s, "%U:%u", format_ip4_address, &k->ip4_key.ip_addr_lo,
+             k->ip4_key.port_lo);
+  return s;
+}
+
+u8 *
+format_sfdp_ipv4_egress (u8 *s, va_list *args)
+{
+  sfdp_session_ip4_key_t *k = va_arg (*args, sfdp_session_ip4_key_t *);
+  s = format (s, "%U:%u", format_ip4_address, &k->ip4_key.ip_addr_hi,
+             k->ip4_key.port_hi);
+  return s;
+}
+
+u8 *
+format_sfdp_ipv6_context_id (u8 *s, va_list *args)
+{
+  sfdp_session_ip6_key_t *k = va_arg (*args, sfdp_session_ip6_key_t *);
+  s = format (s, "%d", k->context_id);
+  return s;
+}
+
+u8 *
+format_sfdp_ipv6_ingress (u8 *s, va_list *args)
+{
+  sfdp_session_ip6_key_t *k = va_arg (*args, sfdp_session_ip6_key_t *);
+  s = format (s, "%U:%u", format_ip6_address, &k->ip6_key.ip6_addr_lo,
+             k->ip6_key.port_lo);
+  return s;
+}
+
+u8 *
+format_sfdp_ipv6_egress (u8 *s, va_list *args)
+{
+  sfdp_session_ip6_key_t *k = va_arg (*args, sfdp_session_ip6_key_t *);
+  s = format (s, "%U:%u", format_ip6_address, &k->ip6_key.ip6_addr_hi,
+             k->ip6_key.port_hi);
+  return s;
+}
+
+void
+sfdp_table_format_add_header_col (table_t *session_table)
+{
+  table_add_header_col (session_table, 11, "id", "tenant", "thread", "index",
+                       "type", "proto", "context", "ingress", "egress",
+                       "state", "TTL(s)");
+}
+
+u32
+sfdp_table_format_insert_session (table_t *t, u32 n, u32 session_index,
+                                 sfdp_session_t *session, u32 tenant_id,
+                                 f64 now)
+{
+  u64 session_net = clib_host_to_net_u64 (session->session_id);
+  sfdp_session_ip46_key_t skey = {};
+  __clib_aligned (CLIB_CACHE_LINE_BYTES)
+  u8 kdata[SFDP_PARSER_MAX_KEY_SIZE];
+  sfdp_parser_main_t *pm = &sfdp_parser_main;
+  sfdp_parser_data_t *parser;
+  /* Session id */
+  table_format_cell (t, n, 0, "0x%U", format_hex_bytes, &session_net,
+                    sizeof (session_net));
+  /* Tenant id */
+  table_format_cell (t, n, 1, "%d", tenant_id);
+  /* Owning thread */
+  table_format_cell (t, n, 2, "%d", session->owning_thread_index);
+  /* Session index */
+  table_format_cell (t, n, 3, "%d", session_index);
+  /* Session type */
+  table_format_cell (t, n, 4, "%U", format_sfdp_session_type, session->type,
+                    session->parser_index[SFDP_SESSION_KEY_PRIMARY]);
+  /* Protocol */
+  table_format_cell (t, n, 5, "%U", format_ip_protocol, session->proto);
+  /* Session state */
+  table_format_cell (t, n, 9, "%U", format_sfdp_session_state, session->state);
+  /* Remaining time */
+  table_format_cell (
+    t, n, 10, "%f",
+    sfdp_main.expiry_callbacks.session_remaining_time (session, now));
+
+  if (session->key_flags & SFDP_SESSION_KEY_FLAG_PRIMARY_VALID_IP4)
+    {
+      sfdp_normalise_ip4_key (session, &skey.key4, SFDP_SESSION_KEY_PRIMARY);
+      table_format_cell (t, n, 6, "%U", format_sfdp_ipv4_context_id,
+                        &skey.key4);
+      table_format_cell (t, n, 7, "%U", format_sfdp_ipv4_ingress, &skey.key4);
+      table_format_cell (t, n, 8, "%U", format_sfdp_ipv4_egress, &skey.key4);
+    }
+  else if (session->key_flags & SFDP_SESSION_KEY_FLAG_PRIMARY_VALID_IP6)
+    {
+      sfdp_normalise_ip6_key (session, &skey.key6, SFDP_SESSION_KEY_PRIMARY);
+      table_format_cell (t, n, 6, "%U", format_sfdp_ipv6_context_id,
+                        &skey.key6);
+      table_format_cell (t, n, 7, "%U", format_sfdp_ipv6_ingress, &skey.key6);
+      table_format_cell (t, n, 8, "%U", format_sfdp_ipv6_egress, &skey.key6);
+    }
+  else if (session->key_flags & SFDP_SESSION_KEY_FLAG_PRIMARY_VALID_USER)
+    {
+      parser = vec_elt_at_index (
+       pm->parsers, session->parser_index[SFDP_SESSION_KEY_PRIMARY]);
+      parser->normalize_key_fn (session, kdata, SFDP_SESSION_KEY_PRIMARY);
+      table_format_cell (
+       t, n, 6, "%U", parser->format_fn[SFDP_PARSER_FORMAT_FUNCTION_CONTEXT],
+       kdata);
+      table_format_cell (
+       t, n, 7, "%U", parser->format_fn[SFDP_PARSER_FORMAT_FUNCTION_INGRESS],
+       kdata);
+      table_format_cell (t, n, 8, "%U",
+                        parser->format_fn[SFDP_PARSER_FORMAT_FUNCTION_EGRESS],
+                        kdata);
+    }
+  n += 1;
+  if (session->key_flags & SFDP_SESSION_KEY_FLAG_SECONDARY_VALID_IP4)
+    {
+      sfdp_normalise_ip4_key (session, &skey.key4, SFDP_SESSION_KEY_SECONDARY);
+      table_format_cell (t, n, 6, "%U", format_sfdp_ipv4_context_id,
+                        &skey.key4);
+      table_format_cell (t, n, 7, "%U", format_sfdp_ipv4_ingress, &skey.key4);
+      table_format_cell (t, n, 8, "%U", format_sfdp_ipv4_egress, &skey.key4);
+      n += 1;
+    }
+  else if (session->key_flags & SFDP_SESSION_KEY_FLAG_SECONDARY_VALID_IP6)
+    {
+      sfdp_normalise_ip6_key (session, &skey.key6, SFDP_SESSION_KEY_SECONDARY);
+      table_format_cell (t, n, 6, "%U", format_sfdp_ipv6_context_id,
+                        &skey.key6);
+      table_format_cell (t, n, 7, "%U", format_sfdp_ipv6_ingress, &skey.key6);
+      table_format_cell (t, n, 8, "%U", format_sfdp_ipv6_egress, &skey.key6);
+      n += 1;
+    }
+  else if (session->key_flags & SFDP_SESSION_KEY_FLAG_SECONDARY_VALID_USER)
+    {
+      parser = vec_elt_at_index (
+       pm->parsers, session->parser_index[SFDP_SESSION_KEY_SECONDARY]);
+      parser->normalize_key_fn (session, kdata, SFDP_SESSION_KEY_SECONDARY);
+      table_format_cell (
+       t, n, 6, "%U", parser->format_fn[SFDP_PARSER_FORMAT_FUNCTION_CONTEXT],
+       kdata);
+      table_format_cell (
+       t, n, 7, "%U", parser->format_fn[SFDP_PARSER_FORMAT_FUNCTION_INGRESS],
+       kdata);
+      table_format_cell (t, n, 8, "%U",
+                        parser->format_fn[SFDP_PARSER_FORMAT_FUNCTION_EGRESS],
+                        kdata);
+      n += 1;
+    }
+  return n;
+}
+
+u8 *
+format_sfdp_scope (u8 *s, va_list *args)
+{
+  u32 scope_index = va_arg (*args, u32);
+  sfdp_service_main_t *sm = &sfdp_service_main;
+
+  return format (s, "%s", sm->scope_names[scope_index]);
+}
+
+u8 *
+format_sfdp_bitmap (u8 *s, va_list *args)
+{
+  u32 scope_index = va_arg (*args, u32);
+  sfdp_bitmap_t bmp = va_arg (*args, sfdp_bitmap_t);
+  sfdp_service_main_t *sm = &sfdp_service_main;
+  sfdp_service_registration_t **services =
+    vec_elt_at_index (sm->services_per_scope_index, scope_index)[0];
+  int i;
+  for (i = 0; i < vec_len (services); i++)
+    if (bmp & services[i]->service_mask[0])
+      s = format (s, "%s,", services[i]->node_name);
+  return s;
+}
+
+u8 *
+format_sfdp_session_detail (u8 *s, va_list *args)
+{
+  u32 session_index = va_arg (*args, u32);
+  f64 now = va_arg (*args, f64);
+  sfdp_session_t *session = sfdp_session_at_index (session_index);
+  u32 scope_index = session->scope_index;
+
+  u64 session_net = clib_host_to_net_u64 (session->session_id);
+  vlib_counter_t fctr, bctr;
+  uword thread_index = session->owning_thread_index;
+  sfdp_session_ip46_key_t skey = {};
+  __clib_aligned (CLIB_CACHE_LINE_BYTES)
+  u8 kdata[SFDP_PARSER_MAX_KEY_SIZE];
+  sfdp_parser_main_t *pm = &sfdp_parser_main;
+  sfdp_parser_data_t *parser = 0;
+
+  vlib_get_combined_counter (
+    &sfdp_main.per_session_ctr[SFDP_FLOW_COUNTER_LOOKUP], session_index << 1,
+    &fctr);
+  vlib_get_combined_counter (
+    &sfdp_main.per_session_ctr[SFDP_FLOW_COUNTER_LOOKUP],
+    (session_index << 1) | 0x1, &bctr);
+  /* TODO: deal with secondary keys */
+  if (session->key_flags & SFDP_SESSION_KEY_FLAG_PRIMARY_VALID_IP4)
+    sfdp_normalise_ip4_key (session, &skey.key4, SFDP_SESSION_KEY_PRIMARY);
+  else if (session->key_flags & SFDP_SESSION_KEY_FLAG_PRIMARY_VALID_IP6)
+    sfdp_normalise_ip6_key (session, &skey.key6, SFDP_SESSION_KEY_PRIMARY);
+  else if (session->key_flags & SFDP_SESSION_KEY_FLAG_PRIMARY_VALID_USER)
+    {
+      parser = vec_elt_at_index (
+       pm->parsers, session->parser_index[SFDP_SESSION_KEY_PRIMARY]);
+      parser->normalize_key_fn (session, kdata, SFDP_SESSION_KEY_PRIMARY);
+    }
+
+  s = format (s, "  session id: 0x%U\n", format_hex_bytes, &session_net,
+             sizeof (u64));
+  s = format (s, "  thread index: %d\n",
+             (thread_index == SFDP_UNBOUND_THREAD_INDEX) ? -1 : thread_index);
+  s = format (s, "  session index: %d\n", session_index);
+
+  if (session->key_flags & SFDP_SESSION_KEY_FLAG_PRIMARY_VALID_IP4)
+    s = format (s, "  specification: %U\t%U:%u\t-> %U:%u\n",
+               format_ip_protocol, session->proto, format_sfdp_ipv4_ingress,
+               &skey.key4, format_sfdp_ipv4_egress, &skey.key4);
+  else if (session->key_flags & SFDP_SESSION_KEY_FLAG_PRIMARY_VALID_IP6)
+    s = format (s, "  specification: %U\t%U:%u\t-> %U:%u\n",
+               format_ip_protocol, session->proto, format_sfdp_ipv6_ingress,
+               &skey.key4, format_sfdp_ipv6_egress, &skey.key4);
+  else if (session->key_flags & SFDP_SESSION_KEY_FLAG_PRIMARY_VALID_USER)
+    s = format (s, "  specification: %U\t%U:%u\t-> %U:%u\n",
+               format_ip_protocol, session->proto,
+               parser->format_fn[SFDP_PARSER_FORMAT_FUNCTION_INGRESS], kdata,
+               parser->format_fn[SFDP_PARSER_FORMAT_FUNCTION_EGRESS], kdata);
+
+  s = format (s, "  state: %U\n", format_sfdp_session_state, session->state);
+  s = format (s, "  %U\n", sfdp_main.expiry_callbacks.format_session_details,
+             session, now);
+  s = format (s, "  forward service chain: %U\n", format_sfdp_bitmap,
+             scope_index, session->bitmaps[SFDP_FLOW_FORWARD]);
+  s = format (s, "  reverse service chain: %U\n", format_sfdp_bitmap,
+             scope_index, session->bitmaps[SFDP_FLOW_REVERSE]);
+  s = format (s, "  counters:\n");
+  s = format (s, "    forward flow:\n");
+  s = format (s, "      bytes: %llu\n", fctr.bytes);
+  s = format (s, "      packets: %llu\n", fctr.packets);
+  s = format (s, "    reverse flow:\n");
+  s = format (s, "      bytes: %llu\n", bctr.bytes);
+  s = format (s, "      packets: %llu\n", bctr.packets);
+  return s;
+}
+
+u8 *
+format_sfdp_tenant (u8 *s, va_list *args)
+{
+
+  u32 indent = format_get_indent (s);
+  __clib_unused sfdp_main_t *sfdp = va_arg (*args, sfdp_main_t *);
+  u32 tenant_idx = va_arg (*args, u32);
+  sfdp_tenant_t *tenant = va_arg (*args, sfdp_tenant_t *);
+  u32 scope_index;
+  s = format (s, "index: %d\n", tenant_idx);
+  s = format (s, "%Ucontext: %d\n", format_white_space, indent,
+             tenant->context_id);
+  foreach_sfdp_scope_index (scope_index)
+  {
+    s = format (s, "%Uscope: %U\n", format_white_space, indent,
+               format_sfdp_scope, scope_index);
+    s =
+      format (s, "%Uforward service chain:\n", format_white_space, indent + 2);
+    s =
+      format (s, "%U%U\n", format_white_space, indent + 4, format_sfdp_bitmap,
+             scope_index, tenant->bitmaps[SFDP_FLOW_FORWARD]);
+    s =
+      format (s, "%Ureverse service chain:\n", format_white_space, indent + 2);
+    s =
+      format (s, "%U%U\n", format_white_space, indent + 4, format_sfdp_bitmap,
+             scope_index, tenant->bitmaps[SFDP_FLOW_REVERSE]);
+  }
+  return s;
+}
+
+u8 *
+format_sfdp_tenant_extra (u8 *s, va_list *args)
+{
+  u32 indent = format_get_indent (s);
+  sfdp_main_t *sfdp = va_arg (*args, sfdp_main_t *);
+  vlib_main_t *vm = vlib_get_main ();
+  u32 tenant_idx = va_arg (*args, u32);
+  __clib_unused sfdp_tenant_t *tenant = va_arg (*args, sfdp_tenant_t *);
+  sfdp_timeout_t *timeout;
+  counter_t ctr;
+  vlib_counter_t ctr2;
+  s = format (s, "%s\n", "Counters:");
+
+#define _(x, y, z)                                                            \
+  ctr = vlib_get_simple_counter (                                             \
+    &sfdp->tenant_session_ctr[SFDP_TENANT_SESSION_COUNTER_##x], tenant_idx);  \
+  s = format (s, "%U%s: %llu\n", format_white_space, indent + 2, z, ctr);
+  foreach_sfdp_tenant_session_counter
+#undef _
+#define _(x, y, z)                                                            \
+  vlib_get_combined_counter (                                                 \
+    &sfdp->tenant_data_ctr[SFDP_TENANT_DATA_COUNTER_##x], tenant_idx, &ctr2); \
+  s = format (s, "%U%s: %llu packets\n", format_white_space, indent + 2, z,   \
+             ctr2.packets);                                                  \
+  s = format (s, "%U  %llu bytes\n", format_white_space,                      \
+             indent + strlen (z) + 2, ctr2.bytes);
+    foreach_sfdp_tenant_data_counter
+#undef _
+      s = format (s, "%U%s\n", format_white_space, indent,
+                 "Configured Timeout:");
+
+  sfdp_foreach_timeout (sfdp, timeout)
+  {
+    u32 idx = timeout - sfdp->timeouts;
+    if ((timeout->name != NULL) && strlen (timeout->name))
+      {
+       s = format (s, "%U%s: %d seconds\n", format_white_space, indent + 2,
+                   timeout->name, tenant->timeouts[idx]);
+      }
+  }
+
+  s = format (s, "%U%s\n", format_white_space, indent,
+             "Configured Slowpath nodes:");
+#define _(sym, default, name)                                                 \
+  s = format (s, "%U%s: %U\n", format_white_space, indent + 2, name,          \
+             format_vlib_node_name, vm,                                      \
+             tenant->sp_node_indices[SFDP_SP_NODE_##sym]);
+  foreach_sfdp_sp_node
+#undef _
+    return s;
+}
+
+u8 *
+format_sfdp_sp_node (u8 *s, va_list *args)
+{
+  u32 sp_index = va_arg (*args, u32);
+#define _(sym, default, name)                                                 \
+  if (sp_index == SFDP_SP_NODE_##sym)                                         \
+    s = format (s, name);
+  foreach_sfdp_sp_node
+#undef _
+    return s;
+}
+
+uword
+unformat_sfdp_service (unformat_input_t *input, va_list *args)
+{
+  sfdp_service_main_t *sm = &sfdp_service_main;
+  u32 *result = va_arg (*args, u32 *);
+  int i;
+  for (u32 scope_index = 0; scope_index < sm->n_scopes; scope_index++)
+    for (i = 0; i < vec_len (sm->services_per_scope_index[scope_index]); i++)
+      {
+       sfdp_service_registration_t *reg =
+         vec_elt_at_index (sm->services_per_scope_index[scope_index], i)[0];
+       if (unformat (input, reg->node_name))
+         {
+           *result = reg->index_in_bitmap[0];
+           return 1;
+         }
+      }
+  return 0;
+}
+
+uword
+unformat_sfdp_service_bitmap (unformat_input_t *input, va_list *args)
+{
+  sfdp_bitmap_t *result = va_arg (*args, sfdp_bitmap_t *);
+  int i = -1;
+  sfdp_bitmap_t bitmap = 0;
+  while (unformat_user (input, unformat_sfdp_service, &i))
+    bitmap |= 1ULL << i;
+  if (i > -1)
+    {
+      *result = bitmap;
+      return 1;
+    }
+  return 0;
+}
+
+uword
+unformat_sfdp_scope_name (unformat_input_t *input, va_list *args)
+{
+  u32 *result = va_arg (*args, u32 *);
+  sfdp_service_main_t *sm = &sfdp_service_main;
+  u32 scope_index;
+  for (scope_index = 0; scope_index < sm->n_scopes; scope_index++)
+    if (unformat (input, sm->scope_names[scope_index]))
+      {
+       *result = scope_index;
+       return 1;
+      }
+
+  return 0;
+}
+
+uword
+unformat_sfdp_sp_node (unformat_input_t *input, va_list *args)
+{
+  u32 *result = va_arg (*args, u32 *);
+#define _(sym, default, str)                                                  \
+  if (unformat (input, str))                                                  \
+    {                                                                         \
+      *result = SFDP_SP_NODE_##sym;                                           \
+      return 1;                                                               \
+    }
+  foreach_sfdp_sp_node
+#undef _
+    return 0;
+}
+
+uword
+unformat_sfdp_timeout_name (unformat_input_t *input, va_list *args)
+{
+  u32 *result = va_arg (*args, u32 *);
+  sfdp_main_t *sfdp = &sfdp_main;
+  sfdp_timeout_t *timeout;
+  sfdp_foreach_timeout (sfdp, timeout)
+  {
+    if ((timeout->name != NULL) && strlen (timeout->name) &&
+       unformat (input, timeout->name))
+      {
+       *result = timeout - sfdp->timeouts;
+       return 1;
+      }
+  }
+  return 0;
+}
diff --git a/src/vnet/sfdp/lookup/full_reass_node.c b/src/vnet/sfdp/lookup/full_reass_node.c
new file mode 100644 (file)
index 0000000..bbd3e20
--- /dev/null
@@ -0,0 +1,155 @@
+/*
+ * Copyright (c) 2024 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vlib/vlib.h>
+#include <vnet/vnet.h>
+#include <vnet/sfdp/common.h>
+#include <vnet/sfdp/sfdp.h>
+#include <vnet/sfdp/lookup/reass.h>
+
+typedef struct
+{
+} sfdp_lookup_sp_full_reass_trace_t;
+
+static u8 *
+format_sfdp_lookup_sp_full_reass_trace (u8 *s, va_list *args)
+{
+  CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
+  vlib_node_t *node = va_arg (*args, vlib_node_t *);
+  CLIB_UNUSED (sfdp_lookup_sp_full_reass_trace_t * t) =
+    va_arg (*args, sfdp_lookup_sp_full_reass_trace_t *);
+  s = format (s, "%v: sent to full reass node", node->name);
+
+  return s;
+}
+
+#define foreach_sfdp_lookup_sp_full_reass_next                                \
+  _ (IP4, "ip4-full-reassembly-custom-context")                               \
+  _ (IP6, "ip6-full-reassembly-custom-context")
+
+enum
+{
+#define _(sym, str) SFDP_LOOKUP_SP_FULL_REASS_NEXT_##sym,
+  foreach_sfdp_lookup_sp_full_reass_next
+#undef _
+    SFDP_LOOKUP_SP_FULL_REASS_N_NEXT
+};
+
+#define foreach_sfdp_lookup_sp_full_reass_error _ (NOERROR, "No error")
+
+typedef enum
+{
+#define _(sym, str) SFDP_LOOKUP_SP_FULL_REASS_ERROR_##sym,
+  SFDP_LOOKUP_SP_FULL_REASS_N_ERROR
+#undef _
+} sfdp_lookup_sp_full_reass_error_t;
+
+static char *sfdp_lookup_sp_full_reass_error_strings[] = {
+#define _(sym, str) str,
+  foreach_sfdp_lookup_sp_full_reass_error
+#undef _
+};
+
+static_always_inline u32
+sfdp_lookup_sp_full_reass_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
+                                 vlib_frame_t *frame, bool is_ip6)
+{
+  sfdp_reass_main_t *vrm = &sfdp_reass_main;
+  vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b;
+  u32 aux_data[VLIB_FRAME_SIZE], *a;
+  u32 *from = vlib_frame_vector_args (frame);
+  u32 n_left = frame->n_vectors;
+
+  vlib_get_buffers (vm, from, bufs, n_left);
+  b = bufs;
+  a = aux_data;
+
+  // TODO: prefetch + 4-loop
+  while (n_left)
+    {
+      a[0] = b[0]->flow_id;
+      if (PREDICT_FALSE (b[0]->flags & VLIB_BUFFER_IS_TRACED))
+       {
+         sfdp_lookup_sp_full_reass_trace_t *t =
+           vlib_add_trace (vm, node, b[0], sizeof (*t));
+       }
+
+      /* Save the tenant index */
+      sfdp_buffer2 (b[0])->tenant_index = sfdp_buffer (b[0])->tenant_index;
+      sfdp_buffer2 (b[0])->flags = SFDP_BUFFER_FLAG_FULL_REASSEMBLED;
+
+      vnet_buffer (b[0])->ip.reass.next_index =
+       is_ip6 ? vrm->ip6_full_reass_next_index :
+                vrm->ip4_full_reass_next_index;
+      vnet_buffer (b[0])->ip.reass.error_next_index =
+       is_ip6 ? vrm->ip6_full_reass_err_next_index :
+                vrm->ip4_full_reass_err_next_index;
+      b += 1;
+      a += 1;
+      n_left -= 1;
+    }
+
+  vlib_buffer_enqueue_to_single_next_with_aux (
+    vm, node, from, aux_data,
+    is_ip6 ? SFDP_LOOKUP_SP_FULL_REASS_NEXT_IP6 :
+            SFDP_LOOKUP_SP_FULL_REASS_NEXT_IP4,
+    frame->n_vectors);
+
+  return frame->n_vectors;
+}
+
+VLIB_NODE_FN (sfdp_lookup_ip4_sp_full_reass)
+(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame)
+{
+  return sfdp_lookup_sp_full_reass_inline (vm, node, frame, 0);
+}
+
+VLIB_REGISTER_NODE (sfdp_lookup_ip4_sp_full_reass) = {
+  .name = "sfdp-lookup-ip4-sp-full-reass",
+  .vector_size = sizeof (u32),
+  .format_trace = format_sfdp_lookup_sp_full_reass_trace,
+  .type = VLIB_NODE_TYPE_INTERNAL,
+
+  .n_errors = ARRAY_LEN (sfdp_lookup_sp_full_reass_error_strings),
+  .error_strings = sfdp_lookup_sp_full_reass_error_strings,
+  .next_nodes = {
+#define _(sym, str) [SFDP_LOOKUP_SP_FULL_REASS_NEXT_##sym] = str,
+  foreach_sfdp_lookup_sp_full_reass_next
+#undef _
+  },
+  .n_next_nodes = SFDP_LOOKUP_SP_FULL_REASS_N_NEXT,
+};
+
+VLIB_NODE_FN (sfdp_lookup_ip6_sp_full_reass)
+(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame)
+{
+  return sfdp_lookup_sp_full_reass_inline (vm, node, frame, 1);
+}
+
+VLIB_REGISTER_NODE (sfdp_lookup_ip6_sp_full_reass) = {
+  .name = "sfdp-lookup-ip6-sp-full-reass",
+  .vector_size = sizeof (u32),
+  .format_trace = format_sfdp_lookup_sp_full_reass_trace,
+  .type = VLIB_NODE_TYPE_INTERNAL,
+
+  .n_errors = ARRAY_LEN (sfdp_lookup_sp_full_reass_error_strings),
+  .error_strings = sfdp_lookup_sp_full_reass_error_strings,
+  .next_nodes = {
+#define _(sym, str) [SFDP_LOOKUP_SP_FULL_REASS_NEXT_##sym] = str,
+  foreach_sfdp_lookup_sp_full_reass_next
+#undef _
+  },
+  .n_next_nodes = SFDP_LOOKUP_SP_FULL_REASS_N_NEXT,
+};
\ No newline at end of file
diff --git a/src/vnet/sfdp/lookup/icmp_error_node.c b/src/vnet/sfdp/lookup/icmp_error_node.c
new file mode 100644 (file)
index 0000000..1e0e151
--- /dev/null
@@ -0,0 +1,398 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright (c) 2025 Cisco Systems, Inc.
+ */
+
+#include <vlib/vlib.h>
+#include <vnet/vnet.h>
+#include <vnet/ethernet/ethernet.h>
+#include <vppinfra/error.h>
+#include <vppinfra/bihash_24_8.h>
+#include <vppinfra/bihash_48_8.h>
+#include <vnet/sfdp/common.h>
+#include <vnet/sfdp/service.h>
+#include <vnet/sfdp/sfdp_funcs.h>
+#include "lookup_inlines.h"
+
+#define foreach_sfdp_lookup_icmp_error                                        \
+  _ (NO_SESSION, "no session")                                                \
+  _ (INVALID_INNER_PKT, "invalid inner packet")
+
+typedef enum
+{
+#define _(sym, str) SFDP_LOOKUP_ICMP_ERROR_##sym,
+  foreach_sfdp_lookup_icmp_error
+#undef _
+    SFDP_LOOKUP_ICMP_ERROR_N_ERROR,
+} sfdp_icmp_lookup_error_t;
+
+static char *sfdp_lookup_icmp_error_strings[] = {
+#define _(sym, string) string,
+  foreach_sfdp_lookup_icmp_error
+#undef _
+};
+
+#define foreach_sfdp_lookup_icmp_next _ (DROP, "error-drop")
+
+typedef enum
+{
+#define _(a, b) SFDP_LOOKUP_ICMP_NEXT_##a,
+  foreach_sfdp_lookup_icmp_next
+#undef _
+    SFDP_LOOKUP_ICMP_N_NEXT
+} sfdp_lookup_icmp_next_t;
+
+typedef struct
+{
+
+} sfdp_lookup_icmp_trace_t;
+
+static u8 *
+format_sfdp_lookup_icmp_trace (u8 *s, va_list *args)
+{
+  vlib_main_t __clib_unused *vm = va_arg (*args, vlib_main_t *);
+  vlib_node_t __clib_unused *node = va_arg (*args, vlib_node_t *);
+  sfdp_lookup_icmp_trace_t __clib_unused *t =
+    va_arg (*args, sfdp_lookup_icmp_trace_t *);
+  s = format (s, "%v:", node->name);
+  return s;
+}
+
+static_always_inline uword
+sfdp_lookup_icmp_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
+                        vlib_frame_t *frame, u8 is_ipv6)
+{
+  sfdp_main_t *sfdp = &sfdp_main;
+  u32 thread_index = vm->thread_index;
+
+  u32 *from = vlib_frame_vector_args (frame);
+  u32 n_left = frame->n_vectors;
+  u32 *bi = from;
+  vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b = bufs;
+  vlib_buffer_t *local_bufs[VLIB_FRAME_SIZE];
+  i16 current_data[VLIB_FRAME_SIZE], *cd = current_data;
+  SFDP_SESSION_IP46_KEYS_TYPE (VLIB_FRAME_SIZE) keys;
+  sfdp_session_ip4_key_t *k4 = keys.keys4;
+  sfdp_session_ip6_key_t *k6 = keys.keys6;
+  u64 lookup_vals[VLIB_FRAME_SIZE], *lv = lookup_vals;
+  u64 hashes[VLIB_FRAME_SIZE], *h = hashes;
+  i16 l4_hdr_off[VLIB_FRAME_SIZE], *l4o = l4_hdr_off;
+  u16 local_next_indices[VLIB_FRAME_SIZE], *lni = local_next_indices;
+  u32 local_buffer_indices[VLIB_FRAME_SIZE], *lbi = local_buffer_indices;
+  u32 handoff_buffer_indices[VLIB_FRAME_SIZE], *hbi = handoff_buffer_indices;
+  u16 handoff_thread_indices[VLIB_FRAME_SIZE], *hti = handoff_thread_indices;
+  bool local_has_session[VLIB_FRAME_SIZE], *lhs = local_has_session;
+
+  vlib_get_buffers (vm, from, bufs, n_left);
+
+  if (!is_ipv6)
+    {
+      while (n_left)
+       {
+         /* Advance the current buffer */
+         cd[0] = b[0]->current_data;
+         b[0]->current_data = vnet_buffer (b[0])->l4_hdr_offset +
+                              8 /* ICMP header + unused field */;
+         sfdp_calc_key_v4 (b[0], b[0]->flow_id, k4, lv, h, l4o, 1);
+         b[0]->current_data = cd[0];
+
+         cd += 1;
+         b += 1;
+         k4 += 1;
+         lv += 1;
+         h += 1;
+         l4o += 1;
+         n_left -= 1;
+       }
+    }
+  else
+    {
+      while (n_left)
+       {
+         /* Advance the current buffer */
+         cd[0] = b[0]->current_data;
+         b[0]->current_data = vnet_buffer (b[0])->l4_hdr_offset +
+                              8 /* ICMP header + unused field */;
+         sfdp_calc_key_v6 (b[0], b[0]->flow_id, k6, lv, h, l4o, 1);
+         b[0]->current_data = cd[0];
+
+         cd += 1;
+         b += 1;
+         k6 += 1;
+         lv += 1;
+         h += 1;
+         l4o += 1;
+         n_left -= 1;
+       }
+    }
+  /* Perform the lookup */
+  b = bufs;
+  bi = from;
+  k4 = keys.keys4;
+  k6 = keys.keys6;
+  lv = lookup_vals;
+  h = hashes;
+  l4o = l4_hdr_off;
+
+  n_left = frame->n_vectors;
+
+  if (!is_ipv6)
+    {
+      while (n_left)
+       {
+         uword flow_thread_index;
+         u16 tenant_index;
+         sfdp_tenant_t *tenant;
+         clib_bihash_kv_24_8_t kv4;
+
+         if (lv[0] & SFDP_LV_TO_SP)
+           {
+             vlib_node_increment_counter (
+               vm, node->node_index, SFDP_LOOKUP_ICMP_ERROR_INVALID_INNER_PKT,
+               1);
+             lbi[0] = bi[0];
+             lni[0] = SFDP_LOOKUP_ICMP_NEXT_DROP;
+             lhs[0] = false;
+
+             lbi += 1;
+             lni += 1;
+             lhs += 1;
+             goto next_pkt4;
+           }
+
+         clib_memcpy (&kv4.key, k4, 24);
+         if (clib_bihash_search_inline_with_hash_24_8 (&sfdp->table4, h[0],
+                                                       &kv4))
+           {
+             /* TODO: not drop? */
+             vlib_node_increment_counter (
+               vm, node->node_index, SFDP_LOOKUP_ICMP_ERROR_NO_SESSION, 1);
+             lbi[0] = bi[0];
+             lni[0] = SFDP_LOOKUP_ICMP_NEXT_DROP;
+             lhs[0] = false;
+
+             lbi += 1;
+             lni += 1;
+             lhs += 1;
+             goto next_pkt4;
+           }
+         else
+           {
+             lv[0] ^= kv4.value;
+           }
+
+         flow_thread_index = sfdp_thread_index_from_lookup (lv[0]);
+
+         if (thread_index != flow_thread_index)
+           {
+             hbi[0] = bi[0];
+             hti[0] = flow_thread_index;
+
+             hbi += 1;
+             hti += 1;
+             goto next_pkt4;
+           }
+         /* Flip last bit of flow index because the error goes into the
+          * opposite direction */
+         b[0]->flow_id = sfdp_pseudo_flow_index_from_lookup (lv[0]) ^ 0x1;
+
+         tenant_index = sfdp_buffer (b[0])->tenant_index;
+         tenant = sfdp_tenant_at_index (sfdp, tenant_index);
+
+         lbi[0] = bi[0];
+         lni[0] = tenant->icmp4_lookup_next;
+         lhs[0] = true;
+
+         lbi += 1;
+         lni += 1;
+         lhs += 1;
+
+       next_pkt4:
+
+         b += 1;
+         bi += 1;
+         k4 += 1;
+         lv += 1;
+         h += 1;
+         l4o += 1;
+         n_left -= 1;
+       }
+    }
+  else
+    {
+      while (n_left)
+       {
+         uword flow_thread_index;
+         u16 tenant_index;
+         sfdp_tenant_t *tenant;
+         clib_bihash_kv_48_8_t kv6;
+
+         if (lv[0] & SFDP_LV_TO_SP)
+           {
+             vlib_node_increment_counter (
+               vm, node->node_index, SFDP_LOOKUP_ICMP_ERROR_INVALID_INNER_PKT,
+               1);
+             lbi[0] = bi[0];
+             lni[0] = SFDP_LOOKUP_ICMP_NEXT_DROP;
+             lhs[0] = false;
+
+             lbi += 1;
+             lni += 1;
+             lhs += 1;
+             goto next_pkt6;
+           }
+
+         clib_memcpy (&kv6.key, k6, 48);
+         if (clib_bihash_search_inline_with_hash_48_8 (&sfdp->table6, h[0],
+                                                       &kv6))
+           {
+             /* TODO: not drop? */
+             vlib_node_increment_counter (
+               vm, node->node_index, SFDP_LOOKUP_ICMP_ERROR_NO_SESSION, 1);
+             lbi[0] = bi[0];
+             lni[0] = SFDP_LOOKUP_ICMP_NEXT_DROP;
+             lhs[0] = false;
+
+             lbi += 1;
+             lni += 1;
+             lhs += 1;
+             goto next_pkt6;
+           }
+         else
+           {
+             lv[0] ^= kv6.value;
+           }
+
+         flow_thread_index = sfdp_thread_index_from_lookup (lv[0]);
+
+         if (thread_index != flow_thread_index)
+           {
+             hbi[0] = bi[0];
+             hti[0] = flow_thread_index;
+
+             hbi += 1;
+             hti += 1;
+             goto next_pkt6;
+           }
+         /* Flip last bit of flow index because the error goes into the
+          * opposite direction */
+         b[0]->flow_id = sfdp_pseudo_flow_index_from_lookup (lv[0]) ^ 0x1;
+
+         tenant_index = sfdp_buffer (b[0])->tenant_index;
+         tenant = sfdp_tenant_at_index (sfdp, tenant_index);
+
+         lbi[0] = bi[0];
+         lni[0] = tenant->icmp6_lookup_next;
+         lhs[0] = true;
+
+         lbi += 1;
+         lni += 1;
+         lhs += 1;
+
+       next_pkt6:
+
+         b += 1;
+         bi += 1;
+         k4 += 1;
+         lv += 1;
+         h += 1;
+         l4o += 1;
+         n_left -= 1;
+       }
+    }
+
+  if (lbi - local_buffer_indices)
+    {
+      uword n = lbi - local_buffer_indices;
+      uword n_left_local = n;
+      lbi = local_buffer_indices;
+      lhs = local_has_session;
+      vlib_get_buffers (vm, lbi, local_bufs, n);
+      b = local_bufs;
+      while (n_left_local)
+       {
+         sfdp_session_t *session;
+         if (lhs[0])
+           {
+             u32 session_idx = sfdp_session_from_flow_index (b[0]->flow_id);
+             session = sfdp_session_at_index (session_idx);
+             sfdp_buffer (b[0])->tenant_index = session->tenant_idx;
+           }
+         lbi += 1;
+         lhs += 1;
+         n_left_local -= 1;
+         b += 1;
+       }
+      vlib_buffer_enqueue_to_next (vm, node, local_buffer_indices,
+                                  local_next_indices, n);
+    }
+
+  if (hbi - handoff_buffer_indices)
+    vlib_buffer_enqueue_to_thread (
+      vm, node,
+      is_ipv6 ? sfdp->icmp6_error_frame_queue_index :
+               sfdp->icmp4_error_frame_queue_index,
+      handoff_buffer_indices, handoff_thread_indices,
+      hbi - handoff_buffer_indices, 1);
+
+  if (node->flags & VLIB_NODE_FLAG_TRACE)
+    {
+      n_left = frame->n_vectors;
+      b = bufs;
+      while (n_left)
+       {
+         if (b[0]->flags & VLIB_BUFFER_IS_TRACED)
+           {
+             sfdp_lookup_icmp_trace_t *t =
+               vlib_add_trace (vm, node, b[0], sizeof (*t));
+           }
+         b += 1;
+         n_left -= 1;
+       }
+    }
+
+  return frame->n_vectors;
+}
+
+VLIB_NODE_FN (sfdp_lookup_ip4_icmp_node)
+(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame)
+{
+  return sfdp_lookup_icmp_inline (vm, node, frame, 0 /* is ipv6 */);
+}
+
+VLIB_NODE_FN (sfdp_lookup_ip6_icmp_node)
+(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame)
+{
+  return sfdp_lookup_icmp_inline (vm, node, frame, 1 /* is ipv6 */);
+}
+
+VLIB_REGISTER_NODE (sfdp_lookup_ip4_icmp_node) = {
+  .name = "sfdp-lookup-ip4-icmp",
+  .vector_size = sizeof (u32),
+  .format_trace = format_sfdp_lookup_icmp_trace,
+  .type = VLIB_NODE_TYPE_INTERNAL,
+
+  .n_errors = ARRAY_LEN (sfdp_lookup_icmp_error_strings),
+  .error_strings = sfdp_lookup_icmp_error_strings,
+  .next_nodes = {
+#define _(a, b) [SFDP_LOOKUP_ICMP_NEXT_##a] = (b),
+          foreach_sfdp_lookup_icmp_next
+#undef _
+  },
+  .n_next_nodes = SFDP_LOOKUP_ICMP_N_NEXT
+};
+
+VLIB_REGISTER_NODE (sfdp_lookup_ip6_icmp_node) = {
+  .name = "sfdp-lookup-ip6-icmp",
+  .vector_size = sizeof (u32),
+  .format_trace = format_sfdp_lookup_icmp_trace,
+  .type = VLIB_NODE_TYPE_INTERNAL,
+
+  .n_errors = ARRAY_LEN (sfdp_lookup_icmp_error_strings),
+  .error_strings = sfdp_lookup_icmp_error_strings,
+    .next_nodes = {
+#define _(a, b) [SFDP_LOOKUP_ICMP_NEXT_##a] = (b),
+          foreach_sfdp_lookup_icmp_next
+#undef _
+  },
+  .n_next_nodes = SFDP_LOOKUP_ICMP_N_NEXT
+};
diff --git a/src/vnet/sfdp/lookup/lookup.h b/src/vnet/sfdp/lookup/lookup.h
new file mode 100644 (file)
index 0000000..4f1298b
--- /dev/null
@@ -0,0 +1,30 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright (c) 2025 Cisco Systems, Inc.
+ */
+
+#ifndef __included_lookup_h__
+#define __included_lookup_h__
+
+#define SFDP_LV_TO_SP ((u64) 0x1 << 63)
+#define foreach_sfdp_lookup_error                                             \
+  _ (MISS, "flow miss")                                                       \
+  _ (LOCAL, "local flow")                                                     \
+  _ (REMOTE, "remote flow")                                                   \
+  _ (COLLISION, "hash add collision")                                         \
+  _ (CON_DROP, "handoff drop")                                                \
+  _ (TABLE_OVERFLOW, "table overflow")
+
+typedef enum
+{
+#define _(sym, str) SFDP_LOOKUP_ERROR_##sym,
+  foreach_sfdp_lookup_error
+#undef _
+    SFDP_LOOKUP_N_ERROR,
+} sfdp_lookup_error_t;
+__clib_unused static char *sfdp_lookup_error_strings[] = {
+#define _(sym, string) string,
+  foreach_sfdp_lookup_error
+#undef _
+};
+
+#endif
\ No newline at end of file
diff --git a/src/vnet/sfdp/lookup/lookup_common.h b/src/vnet/sfdp/lookup/lookup_common.h
new file mode 100644 (file)
index 0000000..29e5be3
--- /dev/null
@@ -0,0 +1,171 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright (c) 2025 Cisco Systems, Inc.
+ */
+
+#ifndef __included_lookup_common_h__
+#define __included_lookup_common_h__
+#include <vlib/vlib.h>
+#include <vnet/ip/ip.h>
+#ifdef __SSE4_1__
+#define u32x4_insert(v, x, i) (u32x4) _mm_insert_epi32 ((__m128i) (v), x, i)
+#else
+static_always_inline u32x4
+u32x4_insert (u32x4 v, u32 x, int i)
+{
+  u32x4 tmp = v;
+  tmp[i] = x;
+  return tmp;
+}
+#endif
+
+#ifdef __SSE3__
+#define u8x8_shuffle(v, i) (u8x8) _mm_shuffle_pi8 ((__m64) (v), (__m64) i)
+#elif defined(__clang__)
+static_always_inline u8x8
+u8x8_shuffle (u8x8 v, u8x8 i)
+{
+  u8x8 tmp = { 0 };
+  u16x8 tmp2;
+  tmp[0] = v[i[0] & 0x7];
+  tmp[1] = v[i[1] & 0x7];
+  tmp[2] = v[i[2] & 0x7];
+  tmp[3] = v[i[3] & 0x7];
+  tmp[4] = v[i[4] & 0x7];
+  tmp[5] = v[i[5] & 0x7];
+  tmp[6] = v[i[6] & 0x7];
+  tmp[7] = v[i[7] & 0x7];
+  tmp2 = __builtin_convertvector (i, u16x8);
+  tmp2 &= (u16x8){ 128, 128, 128, 128, 128, 128, 128, 128 };
+  tmp2 <<= 1;
+  tmp2 -= 1;
+  tmp2 = ~tmp2;
+  tmp &= __builtin_convertvector (tmp2, u8x8);
+  return tmp;
+}
+#else
+#define u8x8_shuffle(v, i) __builtin_shuffle ((u8x8) v, (u8x8) i)
+#endif
+
+#ifndef CLIB_HAVE_VEC256
+#define u32x8_splat(i) ((u32) (i) & (u32x8){ ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0 })
+#endif
+
+#ifndef SHUFFLE
+#if defined(__clang__)
+#define SHUFFLE(v1, v2, i) __builtin_shufflevector ((v1), (v2), (i))
+#elif defined(__GNUC__)
+#define SHUFFLE(v1, v2, i) __builtin_shuffle ((v1), (v2), (i))
+#endif
+#endif
+
+#define u8x16_SHUFFLE(v1, v2, i)                                              \
+  (u8x16) SHUFFLE ((u8x16) (v1), (u8x16) (v2), (u8x16) (i))
+#define u32x8_SHUFFLE(v1, v2, i)                                              \
+  (u32x8) SHUFFLE ((u32x8) (v1), (u32x8) (v2), (u32x8) (i))
+
+#ifdef __SSE3__
+#define u8x16_shuffle_dynamic(v, i)                                           \
+  (u8x16) _mm_shuffle_epi8 ((__m128i) v, (__m128i) i)
+#elif defined(__clang__)
+static_always_inline u8x16
+u8x16_shuffle_dynamic (u8x16 v, u8x16 i)
+{
+  u8x16 tmp = { 0 };
+  u16x16 tmp2;
+  tmp[0] = v[i[0] & 0xf];
+  tmp[1] = v[i[1] & 0xf];
+  tmp[2] = v[i[2] & 0xf];
+  tmp[3] = v[i[3] & 0xf];
+  tmp[4] = v[i[4] & 0xf];
+  tmp[5] = v[i[5] & 0xf];
+  tmp[6] = v[i[6] & 0xf];
+  tmp[7] = v[i[7] & 0xf];
+  tmp[8] = v[i[8] & 0xf];
+  tmp[9] = v[i[9] & 0xf];
+  tmp[10] = v[i[10] & 0xf];
+  tmp[11] = v[i[11] & 0xf];
+  tmp[12] = v[i[12] & 0xf];
+  tmp[13] = v[i[13] & 0xf];
+  tmp[14] = v[i[14] & 0xf];
+  tmp[15] = v[i[15] & 0xf];
+  tmp2 = __builtin_convertvector (i, u16x16);
+  tmp2 &= (u16x16){ 128, 128, 128, 128, 128, 128, 128, 128,
+                   128, 128, 128, 128, 128, 128, 128, 128 };
+  tmp2 <<= 1;
+  tmp2 -= tmp2 >> 8;
+  tmp2 = ~tmp2;
+  tmp &= __builtin_convertvector (tmp2, u8x16);
+  return tmp;
+}
+#else
+static_always_inline u8x16
+u8x16_shuffle_dynamic (u8x16 v, u8x16 i)
+{
+  u8x16 tmp = { 0 };
+  tmp = __builtin_shuffle (v, i);
+  i >>= 7;
+  i -= 1;
+  tmp &= i;
+  return tmp;
+}
+#endif
+
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wpsabi"
+#ifdef __AVX2__
+#define u32x8_shuffle_dynamic(v, i)                                           \
+  (u32x8) _mm256_permutevar8x32_epi32 ((__m256i) v, (__m256i) i)
+#elif defined(__clang__)
+static_always_inline u32x8
+u32x8_shuffle_dynamic (u32x8 v, u32x8 i)
+{
+  u32x8 tmp = { 0 };
+  tmp[0] = v[i[0] & 0x7];
+  tmp[1] = v[i[1] & 0x7];
+  tmp[2] = v[i[2] & 0x7];
+  tmp[3] = v[i[3] & 0x7];
+  tmp[4] = v[i[4] & 0x7];
+  tmp[5] = v[i[5] & 0x7];
+  tmp[6] = v[i[6] & 0x7];
+  tmp[7] = v[i[7] & 0x7];
+  return tmp;
+}
+#else
+#define u32x8_shuffle_dynamic(v, i) __builtin_shuffle ((u32x8) v, (u32x8) i)
+#endif
+
+static_always_inline u32x2
+u32x2_insert (u32x2 x, u32 y, uword idx)
+{
+  u32x2 tmp = x;
+  tmp[idx] = y;
+  return tmp;
+}
+
+static_always_inline u8x8
+u8x8_insert (u8x8 x, u8 y, uword idx)
+{
+  u8x8 tmp = x;
+  tmp[idx] = y;
+  return tmp;
+}
+#pragma GCC diagnostic pop
+__clib_unused static const u8 l4_mask_bits[256] = {
+  [IP_PROTOCOL_ICMP] = 16,     [IP_PROTOCOL_IGMP] = 8,
+  [IP_PROTOCOL_ICMP6] = 16,    [IP_PROTOCOL_TCP] = 32,
+  [IP_PROTOCOL_UDP] = 32,      [IP_PROTOCOL_IPSEC_ESP] = 32,
+  [IP_PROTOCOL_IPSEC_AH] = 32,
+};
+
+/* L4 data offset to copy into session */
+__clib_unused static const u8 l4_offset_32w[256] = {
+  [IP_PROTOCOL_ICMP] = 1, [IP_PROTOCOL_ICMP6] = 1
+};
+
+/* TODO: add ICMP, ESP, and AH (+ additional
+ * branching or lookup for different
+ * shuffling mask) */
+__clib_unused static const u64 tcp_udp_bitmask =
+  ((1 << IP_PROTOCOL_TCP) | (1 << IP_PROTOCOL_UDP));
+
+#endif /* __included_lookup_common_h__ */
\ No newline at end of file
diff --git a/src/vnet/sfdp/lookup/lookup_inlines.h b/src/vnet/sfdp/lookup/lookup_inlines.h
new file mode 100644 (file)
index 0000000..6b136f5
--- /dev/null
@@ -0,0 +1,17 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright (c) 2025 Cisco Systems, Inc.
+ */
+
+#ifndef __included_lookup_inlines_h__
+#define __included_lookup_inlines_h__
+#include <vlib/vlib.h>
+#include <vnet/vnet.h>
+#include <vnet/ethernet/ethernet.h>
+#include <vnet/ip/ip4.h>
+#include <vnet/sfdp/sfdp.h>
+#include <vnet/sfdp/lookup/lookup.h>
+#include <vnet/sfdp/common.h>
+
+#include <vnet/sfdp/lookup/lookup_ip4.h>
+#include <vnet/sfdp/lookup/lookup_ip6.h>
+#endif /* __included_lookup_inlines_h__ */
diff --git a/src/vnet/sfdp/lookup/lookup_ip4.h b/src/vnet/sfdp/lookup/lookup_ip4.h
new file mode 100644 (file)
index 0000000..2aa4011
--- /dev/null
@@ -0,0 +1,215 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright (c) 2025 Cisco Systems, Inc.
+ */
+
+#ifndef __included_sfdp_lookup_ip4_h__
+#define __included_sfdp_lookup_ip4_h__
+#include <vlib/vlib.h>
+#include <vnet/sfdp/sfdp.h>
+#include <vnet/sfdp/lookup/lookup.h>
+#include <vnet/sfdp/lookup/lookup_common.h>
+static const u64 icmp4_type_ping_bitmask =
+  (1ULL << ICMP4_echo_request) | (1ULL << ICMP4_echo_reply);
+
+static const u64 icmp4_type_errors_bitmask =
+  (1ULL << ICMP4_destination_unreachable) | (1ULL << ICMP4_redirect) |
+  (1ULL << ICMP4_time_exceeded);
+
+#define IP4_REASS_NEEDED_FLAGS                                                \
+  ((u16) IP4_HEADER_FLAG_MORE_FRAGMENTS | (u16) ((1 << 13) - 1))
+
+#define KEY_IP4_SHUFF_NO_NORM                                                 \
+  0, 1, 2, 3, -1, 5, -1, -1, 8, 9, 10, 11, 12, 13, 14, 15
+
+#define KEY_IP4_SHUFF_NORM                                                    \
+  2, 3, 0, 1, -1, 5, -1, -1, 12, 13, 14, 15, 8, 9, 10, 11
+
+#define SRC_IP4_BYTESWAP_X2                                                   \
+  11, 10, 9, 8, 16, 16, 16, 16, 11, 10, 9, 8, 16, 16, 16, 16
+#define DST_IP4_BYTESWAP_X2                                                   \
+  15, 14, 13, 12, 16, 16, 16, 16, 15, 14, 13, 12, 16, 16, 16, 16
+
+#define KEY_IP4_SWAP_ICMP                                                     \
+  2, 3, 0, 1, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16
+
+static const u8x16 key_ip4_shuff_no_norm = { KEY_IP4_SHUFF_NO_NORM };
+
+static const u8x16 key_ip4_shuff_norm = { KEY_IP4_SHUFF_NORM };
+
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wpsabi"
+static_always_inline u8
+sfdp_calc_key_v4 (vlib_buffer_t *b, u32 context_id,
+                 sfdp_session_ip4_key_t *skey, u64 *lookup_val, u64 *h,
+                 i16 *l4_hdr_offset, u8 slow_path)
+{
+  u8 pr;
+  i64x2 norm, zero = {};
+  u8x16 k, swap;
+  u32 l4_hdr;
+  void *next_header;
+  ip4_header_t *ip = vlib_buffer_get_current (b);
+  u8 slowpath_needed;
+  u8 reass_needed;
+  u8 l4_from_sv_reass = 0;
+  u8 from_full_reass;
+  u8 tcp_or_udp;
+  u8 unknown_protocol;
+  /* load last 16 bytes of ip header into 128-bit register */
+  k = *(u8x16u *) ((u8 *) ip + 4);
+  pr = ip->protocol;
+  next_header = ip4_next_header (ip);
+  l4_hdr_offset[0] = (u8 *) next_header - b->data;
+
+  reass_needed = !!(ip->flags_and_fragment_offset &
+                   clib_host_to_net_u16 (IP4_REASS_NEEDED_FLAGS));
+  tcp_or_udp = pr == IP_PROTOCOL_TCP || pr == IP_PROTOCOL_UDP;
+  unknown_protocol = !tcp_or_udp && pr != IP_PROTOCOL_ICMP;
+  from_full_reass =
+    sfdp_buffer2 (b)->flags & SFDP_BUFFER_FLAG_FULL_REASSEMBLED;
+  slowpath_needed = !tcp_or_udp || reass_needed || from_full_reass;
+
+  if (slow_path && reass_needed &&
+      sfdp_buffer2 (b)->flags & SFDP_BUFFER_FLAG_SV_REASSEMBLED)
+    {
+      /* This packet comes back from shallow virtual reassembly */
+      l4_from_sv_reass = 1;
+    }
+  else if (slow_path && reass_needed)
+    {
+      /* Reassembly is needed and has not been done yet */
+      lookup_val[0] = (u64) SFDP_SP_NODE_IP4_REASS << 32 | SFDP_LV_TO_SP;
+      return slowpath_needed;
+    }
+
+  /* non TCP, UDP or ICMP packets are going to slowpath */
+  if (slow_path && unknown_protocol)
+    {
+      lookup_val[0] =
+       (u64) SFDP_SP_NODE_IP4_UNKNOWN_PROTO << 32 | SFDP_LV_TO_SP;
+      /*
+       * full_reass will change the sfdp buf, need to restore it
+       * before returing.
+       */
+      if (from_full_reass)
+       goto restore_sfdp_buf;
+
+      return slowpath_needed;
+    }
+
+  /* byteswap src and dst ip and splat into all 4 elts of u32x4, then
+   * compare so result will hold all ones if we need to swap src and dst
+   * signed vector type is used as */
+  norm = (((i64x2) u8x16_shuffle2 (k, zero, SRC_IP4_BYTESWAP_X2)) >
+         ((i64x2) u8x16_shuffle2 (k, zero, DST_IP4_BYTESWAP_X2)));
+
+  if (slow_path && pr == IP_PROTOCOL_ICMP)
+    {
+      u8 type;
+      i64 x, y;
+
+      if (l4_from_sv_reass)
+       type = vnet_buffer (b)->ip.reass.icmp_type_or_tcp_flags;
+      else
+       {
+         icmp46_header_t *icmp = next_header;
+         type = icmp->type;
+       }
+      x = (1ULL << type) & icmp4_type_ping_bitmask;
+      y = (1ULL << type) & icmp4_type_errors_bitmask;
+      if (x == 0)
+       {
+         /* If it's an known ICMP error, treat in the specific slowpath (with
+            a lookup on inner packet), otherwise, it's an unknown protocol */
+         lookup_val[0] =
+           y ? (u64) SFDP_SP_NODE_IP4_ICMP4_ERROR << 32 | SFDP_LV_TO_SP :
+               (u64) SFDP_SP_NODE_IP4_UNKNOWN_PROTO << 32 | SFDP_LV_TO_SP;
+         /*
+          * full_reass will change the sfdp buf, need to restore it
+          * before returing.
+          */
+         if (from_full_reass)
+           goto restore_sfdp_buf;
+
+         return slowpath_needed;
+       }
+      norm &= i64x2_splat (x) != zero;
+    }
+  else
+    {
+      norm &= i64x2_splat ((1ULL << pr) & tcp_udp_bitmask) != zero;
+    }
+  swap = key_ip4_shuff_no_norm;
+  /* if norm is zero, we don't need to normalize so nothing happens here */
+  swap += (key_ip4_shuff_norm - key_ip4_shuff_no_norm) & (u8x16) norm;
+
+  /* overwrite first 4 bytes with first 0 - 4 bytes of l4 header */
+  if (slow_path && l4_from_sv_reass)
+    {
+      u16 src_port, dst_port;
+      src_port = vnet_buffer (b)->ip.reass.l4_src_port;
+      dst_port = vnet_buffer (b)->ip.reass.l4_dst_port;
+      l4_hdr = dst_port << 16 | src_port;
+      /* Mask seqnum field out for ICMP */
+      if (pr == IP_PROTOCOL_ICMP)
+       l4_hdr &= 0xff;
+    }
+  else if (slow_path)
+    l4_hdr = ((u32 *) next_header + l4_offset_32w[pr])[0] &
+            pow2_mask (l4_mask_bits[pr]);
+  else
+    l4_hdr = *(u32 *) next_header & pow2_mask (l4_mask_bits[pr]);
+  k = (u8x16) u32x4_insert ((u32x4) k, l4_hdr, 0);
+
+  k = u8x16_shuffle_dynamic (k, swap);
+
+  /* Reshuffle for ICMP
+     TODO: merge with fast path? */
+  if (slow_path && pr == IP_PROTOCOL_ICMP)
+    k += u8x16_shuffle2 (k, zero, KEY_IP4_SWAP_ICMP);
+  lookup_val[0] = ((u32x4) norm)[0] & 0x1;
+
+  /* extract tcp flags */
+  if (slow_path && l4_from_sv_reass && pr == IP_PROTOCOL_TCP)
+    sfdp_buffer2 (b)->tcp_flags =
+      vnet_buffer (b)->ip.reass.icmp_type_or_tcp_flags;
+  else if (pr == IP_PROTOCOL_TCP)
+    sfdp_buffer (b)->tcp_flags = *(u8 *) next_header + 13;
+  else
+    sfdp_buffer (b)->tcp_flags = 0;
+
+  /* store key */
+  skey->ip4_key.as_u8x16 = k;
+  skey->context_id = context_id;
+  clib_memset (skey->zeros, 0, sizeof (skey->zeros));
+  /* calculate hash */
+  h[0] = clib_bihash_hash_24_8 ((clib_bihash_kv_24_8_t *) (skey));
+
+  if (slow_path && (l4_from_sv_reass || from_full_reass))
+    {
+    restore_sfdp_buf:
+      /* Restore sfdp_buffer */
+      /* TODO: optimise save/restore ? */
+      sfdp_buffer (b)->flags = sfdp_buffer2 (b)->flags;
+      sfdp_buffer (b)->service_bitmap = sfdp_buffer2 (b)->service_bitmap;
+      sfdp_buffer (b)->tcp_flags = sfdp_buffer2 (b)->tcp_flags;
+      sfdp_buffer (b)->ip6_final_proto = sfdp_buffer2 (b)->ip6_final_proto;
+      sfdp_buffer (b)->tenant_index = sfdp_buffer2 (b)->tenant_index;
+      sfdp_buffer (b)->session_version_before_handoff =
+       sfdp_buffer2 (b)->session_version_before_handoff;
+
+      /*Clear*/
+      sfdp_buffer2 (b)->flags = 0;
+      sfdp_buffer2 (b)->service_bitmap = 0;
+      sfdp_buffer2 (b)->tcp_flags = 0;
+      sfdp_buffer2 (b)->ip6_final_proto = 0;
+      sfdp_buffer2 (b)->tenant_index = 0;
+      sfdp_buffer2 (b)->session_version_before_handoff = 0;
+    }
+
+  /* If slowpath needed == 1, we may have done a lot of useless work that will
+   be overwritten, but we avoid too much branching in fastpath */
+  return slowpath_needed;
+}
+#pragma GCC diagnostic pop
+#endif
\ No newline at end of file
diff --git a/src/vnet/sfdp/lookup/lookup_ip6.h b/src/vnet/sfdp/lookup/lookup_ip6.h
new file mode 100644 (file)
index 0000000..87aedef
--- /dev/null
@@ -0,0 +1,269 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright (c) 2025 Cisco Systems, Inc.
+ */
+
+#ifndef __included_sfdp_lookup_ip6_h__
+#define __included_sfdp_lookup_ip6_h__
+#include <vlib/vlib.h>
+#include <vnet/sfdp/sfdp.h>
+#include <vnet/sfdp/lookup/lookup.h>
+#include <vnet/sfdp/lookup/lookup_common.h>
+
+/*ICMP echo and reply are types 128 & 129 */
+static const u64 icmp6_type_ping_bitmask_128off =
+  (1ULL << (ICMP6_echo_request - 128)) | (1ULL << (ICMP6_echo_reply - 128));
+
+static const u64 icmp6_type_errors_bitmask =
+  (1ULL << ICMP6_destination_unreachable) | (1ULL << ICMP6_time_exceeded);
+
+static const u64 icmp6_type_errors_bitmask_128off =
+  (1ULL << (ICMP6_redirect - 128));
+
+#define KEY_IP6_SHUFF_NO_NORM_A 0, 1, 2, 3, -1, -1, 6, -1
+#define KEY_IP6_SHUFF_NORM_A   2, 3, 0, 1, -1, -1, 6, -1
+#define KEY_IP6_SHUFF_NO_NORM_B 0, 1, 2, 3, 4, 5, 6, 7
+#define KEY_IP6_SHUFF_NORM_B   4, 5, 6, 7, 0, 1, 2, 3
+#define IP6_BYTESWAP           15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0
+#define KEY_IP6_SWAP_ICMP      2, 3, 0, 1, -1, -1, -1, -1
+
+static const u8x8 key_ip6_shuff_no_norm_A = { KEY_IP6_SHUFF_NO_NORM_A };
+static const u8x8 key_ip6_shuff_norm_A = { KEY_IP6_SHUFF_NORM_A };
+static const u32x8 key_ip6_shuff_no_norm_B = { KEY_IP6_SHUFF_NO_NORM_B };
+static const u32x8 key_ip6_shuff_norm_B = { KEY_IP6_SHUFF_NORM_B };
+static const u8x8 key_ip6_swap_icmp = { KEY_IP6_SWAP_ICMP };
+
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wpsabi"
+static_always_inline u8
+sfdp_calc_key_v6 (vlib_buffer_t *b, u32 context_id,
+                 sfdp_session_ip6_key_t *skey, u64 *lookup_val, u64 *h,
+                 i16 *l4_hdr_offset, u8 slow_path)
+{
+  u8 pr;
+  i64x2 norm, norm_reverse, zero = {};
+  union
+  {
+    struct
+    {
+      u32x2u as_u32x2;
+      u32x8u as_u32x8;
+    };
+    struct
+    {
+      u8x8u as_u8x8;
+      u8x16u as_u8x16[2];
+    };
+    struct
+    {
+      u64 as_u64;
+      u64x4u as_u64x4;
+    };
+  } k;
+  u8x8 swap_A;
+  u32x8 swap_B;
+  STATIC_ASSERT_SIZEOF (k, 40);
+  u8x16 src_ip6, dst_ip6;
+  u32 l4_hdr;
+  void *next_header;
+  u8 *data = vlib_buffer_get_current (b);
+  ip6_header_t *ip = (void *) data;
+  int slowpath_needed;
+  u8 ext_hdr = 0;
+  u8 l4_from_sv_reass = 0;
+  u8 from_full_reass;
+  u8 tcp_or_udp;
+  u8 unknown_protocol;
+
+  /* loads 40 bytes of ip6 header */
+  k.as_u32x2 = *(u32x2u *) data;
+  k.as_u32x8 = *(u32x8u *) (data + 8);
+
+  if (slow_path && PREDICT_FALSE (sfdp_buffer (b)->flags &
+                                 SFDP_BUFFER_FLAG_IP6_FINAL_PROTO_VALID))
+    {
+      pr = sfdp_buffer (b)->ip6_final_proto;
+      ext_hdr = 0;
+      next_header = b->data + vnet_buffer (b)->l4_hdr_offset;
+      k.as_u8x8 = u8x8_insert (k.as_u8x8, pr, 6); /* use final proto in key */
+    }
+  else
+    {
+      pr = ip->protocol;
+      ext_hdr = ip6_ext_hdr (pr);
+      next_header = ip6_next_header (ip);
+    }
+
+  tcp_or_udp = pr == IP_PROTOCOL_TCP || pr == IP_PROTOCOL_UDP;
+  from_full_reass =
+    sfdp_buffer2 (b)->flags & SFDP_BUFFER_FLAG_FULL_REASSEMBLED;
+  slowpath_needed = !tcp_or_udp || from_full_reass;
+
+  /* byteswap src and dst ip and splat into all 4 elts of u32x4, then
+   * compare so result will hold all ones if we need to swap src and dst
+   * signed vector type is used as */
+  src_ip6 = u8x16_shuffle2 (k.as_u8x16[0], zero, IP6_BYTESWAP);
+  dst_ip6 = u8x16_shuffle2 (k.as_u8x16[1], zero, IP6_BYTESWAP);
+  norm = (u64x2) src_ip6 > (u64x2) dst_ip6;
+  norm_reverse = (u64x2) src_ip6 < (u64x2) dst_ip6;
+  norm = i64x2_splat (norm[1] | (~norm_reverse[1] & norm[0]));
+
+  if (slow_path && sfdp_buffer2 (b)->flags & SFDP_BUFFER_FLAG_SV_REASSEMBLED)
+    {
+      /* This packet comes back from shallow virtual reassembly */
+      l4_from_sv_reass = 1;
+    }
+  if (slow_path && ext_hdr)
+    {
+      /* Parse the extension header chain and look for fragmentation */
+      ip6_ext_hdr_chain_t chain = { 0 };
+      int res =
+       ip6_ext_header_walk (b, ip, IP_PROTOCOL_IPV6_FRAGMENTATION, &chain);
+      if (!(l4_from_sv_reass || from_full_reass) && res >= 0 &&
+         chain.eh[res].protocol == IP_PROTOCOL_IPV6_FRAGMENTATION)
+       {
+         /* Reassembly is needed and has not been done yet */
+         lookup_val[0] = (u64) SFDP_SP_NODE_IP6_REASS << 32 | SFDP_LV_TO_SP;
+         return slowpath_needed;
+       }
+      else
+       {
+         next_header =
+           ip6_ext_next_header_offset (ip, chain.eh[chain.length - 1].offset);
+         pr = chain.eh[chain.length - 1].protocol;
+         tcp_or_udp = pr == IP_PROTOCOL_TCP || pr == IP_PROTOCOL_UDP;
+         k.as_u8x8 =
+           u8x8_insert (k.as_u8x8, pr, 6); /* use final proto in key */
+       }
+    }
+  l4_hdr_offset[0] = (u8 *) next_header - b[0].data;
+  unknown_protocol = !tcp_or_udp && pr != IP_PROTOCOL_ICMP6;
+
+  if (slow_path && unknown_protocol)
+    {
+      lookup_val[0] =
+       (u64) SFDP_SP_NODE_IP6_UNKNOWN_PROTO << 32 | SFDP_LV_TO_SP;
+      /*
+       * full_reass will change the sfdp buf, need to restore it
+       * before returing.
+       */
+      if (from_full_reass)
+       goto restore_sfdp_buf;
+
+      return slowpath_needed;
+    }
+
+  if (slow_path && pr == IP_PROTOCOL_ICMP6)
+    {
+      u8 type;
+      i64 x, y, t, t128;
+      if (l4_from_sv_reass)
+       type = vnet_buffer (b)->ip.reass.icmp_type_or_tcp_flags;
+      else
+       {
+         icmp46_header_t *icmp = next_header;
+         type = icmp->type;
+       }
+      t = (1ULL << type);
+      t128 = (1ULL << ((u8) (type - 128)));
+      x = t128 & icmp6_type_ping_bitmask_128off;
+      y = t & icmp6_type_errors_bitmask;
+      y |= t128 & icmp6_type_errors_bitmask_128off;
+      if (x == 0)
+       {
+         /* If it's an known ICMP error, treat in the specific slowpath (with
+        a lookup on inner packet), otherwise, it's an unknown protocol */
+         lookup_val[0] =
+           y ? (u64) SFDP_SP_NODE_IP6_ICMP6_ERROR << 32 | SFDP_LV_TO_SP :
+               (u64) SFDP_SP_NODE_IP6_UNKNOWN_PROTO << 32 | SFDP_LV_TO_SP;
+         /*
+          * full_reass will change the sfdp buf, need to restore it
+          * before returing.
+          */
+         if (from_full_reass)
+           goto restore_sfdp_buf;
+
+         return slowpath_needed;
+       }
+      norm &= i64x2_splat (x) != zero;
+    }
+  else
+    {
+      norm &= i64x2_splat ((1ULL << pr) & tcp_udp_bitmask) != zero;
+    }
+  swap_A = key_ip6_shuff_no_norm_A;
+  swap_B = key_ip6_shuff_no_norm_B;
+
+  /* if norm is zero, we don't need to normalize so nothing happens here */
+  swap_A += (key_ip6_shuff_norm_A - key_ip6_shuff_no_norm_A) & (u8x8) norm[0];
+  swap_B +=
+    (key_ip6_shuff_norm_B - key_ip6_shuff_no_norm_B) & u32x8_splat (norm[0]);
+
+  /* overwrite first 4 bytes with first 0 - 4 bytes of l4 header */
+  if (slow_path && l4_from_sv_reass)
+    {
+      u16 src_port, dst_port;
+      src_port = vnet_buffer (b)->ip.reass.l4_src_port;
+      dst_port = vnet_buffer (b)->ip.reass.l4_dst_port;
+      l4_hdr = dst_port << 16 | src_port;
+      /* Mask seqnum field out for ICMP */
+      if (pr == IP_PROTOCOL_ICMP6)
+       l4_hdr &= 0xff;
+    }
+  else if (slow_path)
+    l4_hdr = ((u32 *) next_header + l4_offset_32w[pr])[0] &
+            pow2_mask (l4_mask_bits[pr]);
+  else
+    l4_hdr = *(u32 *) next_header & pow2_mask (l4_mask_bits[pr]);
+
+  k.as_u32x2 = u32x2_insert (k.as_u32x2, l4_hdr, 0);
+
+  k.as_u8x8 = u8x8_shuffle (k.as_u8x8, swap_A);
+  k.as_u32x8 = u32x8_shuffle_dynamic (k.as_u32x8, swap_B);
+  /* Reshuffle for ICMP
+     TODO: merge with fast path? */
+  if (slow_path && pr == IP_PROTOCOL_ICMP6)
+    k.as_u8x8 += u8x8_shuffle (k.as_u8x8, key_ip6_swap_icmp);
+  lookup_val[0] = ((u32x4) norm)[0] & 0x1;
+
+  /* extract tcp flags */
+  if (slow_path && l4_from_sv_reass && pr == IP_PROTOCOL_TCP)
+    sfdp_buffer2 (b)->tcp_flags =
+      vnet_buffer (b)->ip.reass.icmp_type_or_tcp_flags;
+  else if (pr == IP_PROTOCOL_TCP)
+    sfdp_buffer (b)->tcp_flags = *(u8 *) next_header + 13;
+  else
+    sfdp_buffer (b)->tcp_flags = 0;
+
+  /* store key */
+  skey->ip6_key.as_u64 = k.as_u64;
+  skey->ip6_key.as_u64x4 = k.as_u64x4;
+  skey->context_id = context_id;
+  clib_memset (skey->zeros, 0, sizeof (skey->zeros));
+  /* calculate hash */
+  h[0] = clib_bihash_hash_48_8 ((clib_bihash_kv_48_8_t *) (skey));
+
+  if (slow_path && (l4_from_sv_reass || from_full_reass))
+    {
+    restore_sfdp_buf:
+      /* Restore sfdp_buffer */
+      /* TODO: optimise save/restore ? */
+      sfdp_buffer (b)->flags = sfdp_buffer2 (b)->flags;
+      sfdp_buffer (b)->service_bitmap = sfdp_buffer2 (b)->service_bitmap;
+      sfdp_buffer (b)->tcp_flags = sfdp_buffer2 (b)->tcp_flags;
+      sfdp_buffer (b)->tenant_index = sfdp_buffer2 (b)->tenant_index;
+      sfdp_buffer (b)->session_version_before_handoff =
+       sfdp_buffer2 (b)->session_version_before_handoff;
+
+      /*Clear*/
+      sfdp_buffer2 (b)->flags = 0;
+      sfdp_buffer2 (b)->service_bitmap = 0;
+      sfdp_buffer2 (b)->tcp_flags = 0;
+      sfdp_buffer2 (b)->tenant_index = 0;
+      sfdp_buffer2 (b)->session_version_before_handoff = 0;
+    }
+  /* If slowpath needed == 1, we may have done a lot of useless work that will
+   be overwritten, but we avoid too much branching in fastpath */
+  return slowpath_needed;
+}
+#pragma GCC diagnostic pop
+#endif /* __included_sfdp_lookup_ip6_h__ */
\ No newline at end of file
diff --git a/src/vnet/sfdp/lookup/node.c b/src/vnet/sfdp/lookup/node.c
new file mode 100644 (file)
index 0000000..c6861cc
--- /dev/null
@@ -0,0 +1,938 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright (c) 2025 Cisco Systems, Inc.
+ */
+
+#include <vlib/vlib.h>
+#include <vnet/vnet.h>
+#include <vnet/pg/pg.h>
+#include <vnet/ethernet/ethernet.h>
+#include <vppinfra/error.h>
+#include <vppinfra/bihash_24_8.h>
+#include <vppinfra/bihash_48_8.h>
+#include <vnet/sfdp/common.h>
+#include <vnet/sfdp/service.h>
+#include <vnet/sfdp/sfdp_funcs.h>
+#include "lookup_inlines.h"
+#include "lookup.h"
+
+#define foreach_sfdp_handoff_error                                            \
+  _ (SESS_DROP, sess_drop, INFO, "Session expired during handoff")            \
+  _ (NOERROR, noerror, INFO, "no error")
+
+typedef enum
+{
+#define _(f, n, s, d) SFDP_HANDOFF_ERROR_##f,
+  foreach_sfdp_handoff_error
+#undef _
+    SFDP_HANDOFF_N_ERROR,
+} sfdp_handoff_error_t;
+
+static vlib_error_desc_t sfdp_handoff_error_counters[] = {
+#define _(f, n, s, d) { #n, d, VL_COUNTER_SEVERITY_##s },
+  foreach_sfdp_handoff_error
+#undef _
+};
+
+typedef struct
+{
+  u32 sw_if_index;
+  union
+  {
+    sfdp_session_ip4_key_t k4;
+    sfdp_session_ip6_key_t k6;
+  };
+  u8 is_ip6;
+  u8 is_sp;
+  union
+  {
+    struct
+    {
+      u32 next_index;
+      u64 hash;
+      u32 flow_id;
+    };
+    struct
+    {
+      u32 sp_index;
+      u32 sp_node_index;
+    };
+  };
+} sfdp_lookup_trace_t;
+
+typedef struct
+{
+  u32 next_index;
+  u32 flow_id;
+} sfdp_handoff_trace_t;
+
+static_always_inline int
+sfdp_create_session_v4 (sfdp_main_t *sfdp, sfdp_per_thread_data_t *ptd,
+                       sfdp_tenant_t *tenant, u16 tenant_idx,
+                       u32 thread_index, f64 time_now, void *k, u64 *h,
+                       u64 *lookup_val, u32 scope_index)
+{
+  return sfdp_create_session_inline (sfdp, ptd, tenant, tenant_idx,
+                                    thread_index, time_now, k, h, lookup_val,
+                                    scope_index, 0);
+}
+
+static_always_inline int
+sfdp_create_session_v6 (sfdp_main_t *sfdp, sfdp_per_thread_data_t *ptd,
+                       sfdp_tenant_t *tenant, u16 tenant_idx,
+                       u32 thread_index, f64 time_now, void *k, u64 *h,
+                       u64 *lookup_val, u32 scope_index)
+{
+  return sfdp_create_session_inline (sfdp, ptd, tenant, tenant_idx,
+                                    thread_index, time_now, k, h, lookup_val,
+                                    scope_index, 1);
+}
+
+static_always_inline u8
+sfdp_lookup_four_v4 (vlib_buffer_t **b, sfdp_session_ip4_key_t *k,
+                    u64 *lookup_val, u64 *h, i16 *l4_hdr_offset,
+                    int prefetch_buffer_stride, u8 slowpath)
+{
+  vlib_buffer_t **pb = b + prefetch_buffer_stride;
+  u8 slowpath_needed = 0;
+  if (prefetch_buffer_stride)
+    {
+      clib_prefetch_load (pb[0]);
+      clib_prefetch_load (pb[0]->data);
+    }
+
+  slowpath_needed |=
+    sfdp_calc_key_v4 (b[0], b[0]->flow_id, k + 0, lookup_val + 0, h + 0,
+                     l4_hdr_offset + 0, slowpath);
+
+  if (prefetch_buffer_stride)
+    {
+      clib_prefetch_load (pb[1]);
+      clib_prefetch_load (pb[1]->data);
+    }
+
+  slowpath_needed |=
+    sfdp_calc_key_v4 (b[1], b[1]->flow_id, k + 1, lookup_val + 1, h + 1,
+                     l4_hdr_offset + 1, slowpath);
+
+  if (prefetch_buffer_stride)
+    {
+      clib_prefetch_load (pb[2]);
+      clib_prefetch_load (pb[2]->data);
+    }
+
+  slowpath_needed |=
+    sfdp_calc_key_v4 (b[2], b[2]->flow_id, k + 2, lookup_val + 2, h + 2,
+                     l4_hdr_offset + 2, slowpath);
+
+  if (prefetch_buffer_stride)
+    {
+      clib_prefetch_load (pb[3]);
+      clib_prefetch_load (pb[3]->data);
+    }
+
+  slowpath_needed |=
+    sfdp_calc_key_v4 (b[3], b[3]->flow_id, k + 3, lookup_val + 3, h + 3,
+                     l4_hdr_offset + 3, slowpath);
+  return slowpath_needed;
+}
+
+static_always_inline u8
+sfdp_lookup_four_v6 (vlib_buffer_t **b, sfdp_session_ip6_key_t *k,
+                    u64 *lookup_val, u64 *h, i16 *l4_hdr_offset,
+                    int prefetch_buffer_stride, u8 slowpath)
+{
+  vlib_buffer_t **pb = b + prefetch_buffer_stride;
+  u8 slowpath_needed = 0;
+  if (prefetch_buffer_stride)
+    {
+      clib_prefetch_load (pb[0]);
+      clib_prefetch_load (pb[0]->data);
+    }
+
+  slowpath_needed |=
+    sfdp_calc_key_v6 (b[0], b[0]->flow_id, k + 0, lookup_val + 0, h + 0,
+                     l4_hdr_offset + 0, slowpath);
+
+  if (prefetch_buffer_stride)
+    {
+      clib_prefetch_load (pb[1]);
+      clib_prefetch_load (pb[1]->data);
+    }
+
+  slowpath_needed |=
+    sfdp_calc_key_v6 (b[1], b[1]->flow_id, k + 1, lookup_val + 1, h + 1,
+                     l4_hdr_offset + 1, slowpath);
+
+  if (prefetch_buffer_stride)
+    {
+      clib_prefetch_load (pb[2]);
+      clib_prefetch_load (pb[2]->data);
+    }
+
+  slowpath_needed |=
+    sfdp_calc_key_v6 (b[2], b[2]->flow_id, k + 2, lookup_val + 2, h + 2,
+                     l4_hdr_offset + 2, slowpath);
+
+  if (prefetch_buffer_stride)
+    {
+      clib_prefetch_load (pb[3]);
+      clib_prefetch_load (pb[3]->data);
+    }
+
+  slowpath_needed |=
+    sfdp_calc_key_v6 (b[3], b[3]->flow_id, k + 3, lookup_val + 3, h + 3,
+                     l4_hdr_offset + 3, slowpath);
+  return slowpath_needed;
+}
+
+static_always_inline void
+sfdp_prepare_all_keys_v4_slow (vlib_buffer_t **b, sfdp_session_ip4_key_t *k,
+                              u64 *lv, u64 *h, i16 *l4_hdr_offset,
+                              u32 n_left);
+
+static_always_inline void
+sfdp_prepare_all_keys_v6_slow (vlib_buffer_t **b, sfdp_session_ip6_key_t *k,
+                              u64 *lv, u64 *h, i16 *l4_hdr_offset,
+                              u32 n_left);
+
+static_always_inline uword
+sfdp_prepare_all_keys_v4 (vlib_buffer_t **b, sfdp_session_ip4_key_t *k,
+                         u64 *lv, u64 *h, i16 *l4_hdr_offset, u32 n_left,
+                         u8 slowpath)
+{
+  /* main loop - prefetch next 4 buffers,
+   * prefetch previous 4 buckets */
+  while (n_left >= 8)
+    {
+      if (sfdp_lookup_four_v4 (b, k, lv, h, l4_hdr_offset, 4, slowpath) &&
+         !slowpath)
+       return n_left;
+
+      b += 4;
+      k += 4;
+      lv += 4;
+      h += 4;
+      l4_hdr_offset += 4;
+      n_left -= 4;
+    }
+
+  /* last 4 packets - dont prefetch next 4 buffers,
+   * prefetch previous 4 buckets */
+  if (n_left >= 4)
+    {
+      if (sfdp_lookup_four_v4 (b, k, lv, h, l4_hdr_offset, 0, slowpath) &&
+         !slowpath)
+       return n_left;
+
+      b += 4;
+      k += 4;
+      lv += 4;
+      h += 4;
+      l4_hdr_offset += 4;
+      n_left -= 4;
+    }
+
+  while (n_left > 0)
+    {
+      if (sfdp_calc_key_v4 (b[0], b[0]->flow_id, k + 0, lv + 0, h + 0,
+                           l4_hdr_offset + 0, slowpath) &&
+         !slowpath)
+       return n_left;
+
+      b += 1;
+      k += 1;
+      lv += 1;
+      h += 1;
+      l4_hdr_offset += 1;
+      n_left -= 1;
+    }
+  return 0;
+}
+
+static_always_inline uword
+sfdp_prepare_all_keys_v6 (vlib_buffer_t **b, sfdp_session_ip6_key_t *k,
+                         u64 *lv, u64 *h, i16 *l4_hdr_offset, u32 n_left,
+                         u8 slowpath)
+{
+  /* main loop - prefetch next 4 buffers,
+   * prefetch previous 4 buckets */
+  while (n_left >= 8)
+    {
+      if (sfdp_lookup_four_v6 (b, k, lv, h, l4_hdr_offset, 4, slowpath) &&
+         !slowpath)
+       return n_left;
+
+      b += 4;
+      k += 4;
+      lv += 4;
+      h += 4;
+      l4_hdr_offset += 4;
+      n_left -= 4;
+    }
+
+  /* last 4 packets - dont prefetch next 4 buffers,
+   * prefetch previous 4 buckets */
+  if (n_left >= 4)
+    {
+      if (sfdp_lookup_four_v6 (b, k, lv, h, l4_hdr_offset, 0, slowpath) &&
+         !slowpath)
+       return n_left;
+
+      b += 4;
+      k += 4;
+      lv += 4;
+      h += 4;
+      l4_hdr_offset += 4;
+      n_left -= 4;
+    }
+
+  while (n_left > 0)
+    {
+      if (sfdp_calc_key_v6 (b[0], b[0]->flow_id, k + 0, lv + 0, h + 0,
+                           l4_hdr_offset, slowpath) &&
+         !slowpath)
+       return n_left;
+
+      b += 1;
+      k += 1;
+      lv += 1;
+      h += 1;
+      l4_hdr_offset += 1;
+      n_left -= 1;
+    }
+  return 0;
+}
+
+static_always_inline void
+sfdp_prepare_all_keys_v4_slow (vlib_buffer_t **b, sfdp_session_ip4_key_t *k,
+                              u64 *lv, u64 *h, i16 *l4_hdr_offset, u32 n_left)
+{
+  sfdp_prepare_all_keys_v4 (b, k, lv, h, l4_hdr_offset, n_left, 1);
+}
+static_always_inline uword
+sfdp_prepare_all_keys_v4_fast (vlib_buffer_t **b, sfdp_session_ip4_key_t *k,
+                              u64 *lv, u64 *h, i16 *l4_hdr_offset, u32 n_left)
+{
+  return sfdp_prepare_all_keys_v4 (b, k, lv, h, l4_hdr_offset, n_left, 0);
+}
+
+static_always_inline void
+sfdp_prepare_all_keys_v6_slow (vlib_buffer_t **b, sfdp_session_ip6_key_t *k,
+                              u64 *lv, u64 *h, i16 *l4_hdr_offset, u32 n_left)
+{
+  sfdp_prepare_all_keys_v6 (b, k, lv, h, l4_hdr_offset, n_left, 1);
+}
+
+static_always_inline uword
+sfdp_prepare_all_keys_v6_fast (vlib_buffer_t **b, sfdp_session_ip6_key_t *k,
+                              u64 *lv, u64 *h, i16 *l4_hdr_offset, u32 n_left)
+{
+  return sfdp_prepare_all_keys_v6 (b, k, lv, h, l4_hdr_offset, n_left, 0);
+}
+
+static_always_inline uword
+sfdp_lookup_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
+                   vlib_frame_t *frame, u8 is_ipv6)
+{
+  sfdp_main_t *sfdp = &sfdp_main;
+  u32 thread_index = vm->thread_index;
+  sfdp_per_thread_data_t *ptd =
+    vec_elt_at_index (sfdp->per_thread_data, thread_index);
+  vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b;
+  sfdp_bihash_kv46_t kv = {};
+  sfdp_tenant_t *tenant;
+  sfdp_session_t *session;
+  u32 session_index;
+  u32 *bi, *from = vlib_frame_vector_args (frame);
+  u32 n_left = frame->n_vectors;
+  u32 to_local[VLIB_FRAME_SIZE], n_local = 0;
+  u32 to_remote[VLIB_FRAME_SIZE], n_remote = 0;
+  u32 to_sp[VLIB_FRAME_SIZE], n_to_sp = 0;
+  u16 thread_indices[VLIB_FRAME_SIZE];
+  u16 local_next_indices[VLIB_FRAME_SIZE];
+  u32 sp_indices[VLIB_FRAME_SIZE];
+  u32 sp_node_indices[VLIB_FRAME_SIZE];
+  vlib_buffer_t *local_bufs[VLIB_FRAME_SIZE];
+  vlib_buffer_t *to_sp_bufs[VLIB_FRAME_SIZE];
+  u32 local_flow_indices[VLIB_FRAME_SIZE];
+  u32 created_session_indices[VLIB_FRAME_SIZE], n_created = 0;
+  SFDP_SESSION_IP46_KEYS_TYPE (VLIB_FRAME_SIZE) keys;
+
+  sfdp_session_ip4_key_t *k4 = keys.keys4;
+  sfdp_session_ip6_key_t *k6 = keys.keys6;
+
+  u64 hashes[VLIB_FRAME_SIZE], *h = hashes;
+  u32 lengths[VLIB_FRAME_SIZE], *len = lengths;
+  i16 l4_hdr_off[VLIB_FRAME_SIZE], *l4o = l4_hdr_off;
+  f64 time_now = vlib_time_now (vm);
+  /* lookup_vals contains:
+   * - (Phase 1) to_slow_path_node (1bit)
+                 ||| slow_path_node_index (31bits)
+   *              ||| zeros(31bits)
+   *              |||
+   *              ||| packet_dir (1bit)
+   *
+   * - (Phase 2) session_version + thread_index + flow_index . Cf. sfdp.h
+      OR same as Phase 1 if slow path
+      ASSUMPTION: thread index < 2^31 */
+  u64 __attribute__ ((aligned (32))) lookup_vals[VLIB_FRAME_SIZE],
+    *lv = lookup_vals;
+  __clib_unused u16 hit_count = 0;
+  uword n_left_slow_keys;
+  sfdp_lookup_node_runtime_data_t *rt = (void *) node->runtime_data;
+  u32 scope_index = rt->scope_index;
+  u32 fqi =
+    vec_elt_at_index (sfdp->frame_queue_index_per_scope, scope_index)[0];
+
+  vlib_get_buffers (vm, from, bufs, n_left);
+  b = bufs;
+
+  if (is_ipv6)
+    {
+      if (PREDICT_FALSE ((n_left_slow_keys = sfdp_prepare_all_keys_v6_fast (
+                           b, k6, lv, h, l4o, n_left))))
+       {
+         uword n_done = n_left - n_left_slow_keys;
+         sfdp_prepare_all_keys_v6_slow (b + n_done, k6 + n_done, lv + n_done,
+                                        h + n_done, l4o + n_done,
+                                        n_left_slow_keys);
+       }
+    }
+  else
+    {
+      if (PREDICT_FALSE ((n_left_slow_keys = sfdp_prepare_all_keys_v4_fast (
+                           b, k4, lv, h, l4o, n_left))))
+       {
+         uword n_done = n_left - n_left_slow_keys;
+         sfdp_prepare_all_keys_v4_slow (b + n_done, k4 + n_done, lv + n_done,
+                                        h + n_done, l4o + n_done,
+                                        n_left_slow_keys);
+       }
+    }
+
+  if (is_ipv6)
+    while (n_left)
+      {
+       if (PREDICT_TRUE (n_left > 8))
+         clib_bihash_prefetch_bucket_48_8 (&sfdp->table6, h[8]);
+
+       if (PREDICT_TRUE (n_left > 1))
+         vlib_prefetch_buffer_header (b[1], STORE);
+
+       if (PREDICT_FALSE (lv[0] & SFDP_LV_TO_SP))
+         goto next_pkt6;
+
+       clib_memcpy_fast (&kv.kv6.key, k6, 48);
+       if (clib_bihash_search_inline_with_hash_48_8 (&sfdp->table6, h[0],
+                                                     &kv.kv6))
+         {
+           u16 tenant_idx = sfdp_buffer (b[0])->tenant_index;
+           int rv;
+           tenant = sfdp_tenant_at_index (sfdp, tenant_idx);
+           rv = sfdp_create_session_v6 (sfdp, ptd, tenant, tenant_idx,
+                                        thread_index, time_now, k6, h, lv,
+                                        scope_index);
+           if (PREDICT_FALSE (rv == 1))
+             {
+               vlib_node_increment_counter (
+                 vm, node->node_index, SFDP_LOOKUP_ERROR_TABLE_OVERFLOW, 1);
+               lv[0] =
+                 (u64) SFDP_SP_NODE_IP6_TABLE_OVERFLOW << 32 | SFDP_LV_TO_SP;
+               goto next_pkt6;
+             }
+           else if (rv == 2)
+             {
+               vlib_node_increment_counter (vm, node->node_index,
+                                            SFDP_LOOKUP_ERROR_COLLISION, 1);
+               continue; /* if there is colision, we just reiterate */
+             }
+           created_session_indices[n_created] =
+             sfdp_session_index_from_lookup (lv[0]);
+           n_created++;
+         }
+       else
+         {
+           lv[0] ^= kv.kv6.value;
+           hit_count++;
+         }
+
+       b[0]->flow_id = sfdp_pseudo_flow_index_from_lookup (lv[0]);
+
+      next_pkt6:
+       b[0]->flags |= VNET_BUFFER_F_L4_HDR_OFFSET_VALID;
+       vnet_buffer (b[0])->l4_hdr_offset = l4o[0];
+       len[0] = vlib_buffer_length_in_chain (vm, b[0]);
+
+       b += 1;
+       n_left -= 1;
+       k6 += 1;
+       h += 1;
+       lv += 1;
+       len += 1;
+      }
+  else
+    while (n_left)
+      {
+       if (PREDICT_TRUE (n_left > 8))
+         clib_bihash_prefetch_bucket_24_8 (&sfdp->table4, h[8]);
+
+       if (PREDICT_TRUE (n_left > 1))
+         vlib_prefetch_buffer_header (b[1], STORE);
+
+       if (PREDICT_FALSE (lv[0] & SFDP_LV_TO_SP))
+         goto next_pkt4;
+
+       clib_memcpy_fast (&kv.kv4.key, k4, 24);
+       if (clib_bihash_search_inline_with_hash_24_8 (&sfdp->table4, h[0],
+                                                     &kv.kv4))
+         {
+           u16 tenant_idx = sfdp_buffer (b[0])->tenant_index;
+           int rv;
+           tenant = sfdp_tenant_at_index (sfdp, tenant_idx);
+           rv = sfdp_create_session_v4 (sfdp, ptd, tenant, tenant_idx,
+                                        thread_index, time_now, k4, h, lv,
+                                        scope_index);
+           if (PREDICT_FALSE (rv == 1))
+             {
+               vlib_node_increment_counter (
+                 vm, node->node_index, SFDP_LOOKUP_ERROR_TABLE_OVERFLOW, 1);
+               lv[0] =
+                 (u64) SFDP_SP_NODE_IP4_TABLE_OVERFLOW << 32 | SFDP_LV_TO_SP;
+               goto next_pkt4;
+             }
+           else if (rv == 2)
+             {
+               vlib_node_increment_counter (vm, node->node_index,
+                                            SFDP_LOOKUP_ERROR_COLLISION, 1);
+               continue; /* if there is colision, we just reiterate */
+             }
+           created_session_indices[n_created] =
+             sfdp_session_index_from_lookup (lv[0]);
+           n_created++;
+         }
+       else
+         {
+           lv[0] ^= kv.kv4.value;
+           hit_count++;
+         }
+
+       b[0]->flow_id = sfdp_pseudo_flow_index_from_lookup (lv[0]);
+
+      next_pkt4:
+       b[0]->flags |= VNET_BUFFER_F_L4_HDR_OFFSET_VALID;
+       vnet_buffer (b[0])->l4_hdr_offset = l4o[0];
+       len[0] = vlib_buffer_length_in_chain (vm, b[0]);
+
+       b += 1;
+       n_left -= 1;
+       k4 += 1;
+       h += 1;
+       lv += 1;
+       len += 1;
+       l4o += 1;
+      }
+
+  // Notify created sessions
+  if (n_created)
+    {
+      sfdp_notify_new_sessions (sfdp, created_session_indices, n_created);
+    }
+
+  n_left = frame->n_vectors;
+  lv = lookup_vals;
+  b = bufs;
+  bi = from;
+  len = lengths;
+  while (n_left)
+    {
+      u16 flow_thread_index;
+      u32 flow_index;
+      session_version_t session_version;
+      vlib_combined_counter_main_t *vcm;
+
+      if (lv[0] & SFDP_LV_TO_SP)
+       {
+         to_sp[n_to_sp] = bi[0];
+         sp_indices[n_to_sp] = (lv[0] & ~(SFDP_LV_TO_SP)) >> 32;
+         to_sp_bufs[n_to_sp] = b[0];
+         n_to_sp++;
+         goto next_packet2;
+       }
+
+      flow_thread_index = sfdp_thread_index_from_lookup (lv[0]);
+      flow_index = sfdp_pseudo_flow_index_from_lookup (lv[0]);
+      session_index = flow_index >> 1;
+      vcm = &sfdp->per_session_ctr[SFDP_FLOW_COUNTER_LOOKUP];
+      session_version = sfdp_session_version_from_lookup (lv[0]);
+      vlib_increment_combined_counter (vcm, thread_index, flow_index, 1,
+                                      len[0]);
+      if (PREDICT_FALSE (flow_thread_index == SFDP_UNBOUND_THREAD_INDEX))
+       {
+         flow_thread_index = thread_index;
+         sfdp_session_bind_to_thread (session_index, &flow_thread_index, 1);
+         /* flow_thread_index now necessarily contains the actual thread index
+          * of the session */
+       }
+      if (flow_thread_index == thread_index)
+       {
+         /* known flow which belongs to this thread */
+         to_local[n_local] = bi[0];
+         local_flow_indices[n_local] = flow_index;
+         local_bufs[n_local] = b[0];
+         n_local++;
+       }
+      else
+       {
+         /* known flow which belongs to remote thread */
+         to_remote[n_remote] = bi[0];
+         thread_indices[n_remote] = flow_thread_index;
+         /* Store the current session version in buffer to check if it's still
+          * valid after handoff */
+         sfdp_buffer (b[0])->session_version_before_handoff = session_version;
+         n_remote++;
+       }
+    next_packet2:
+      n_left -= 1;
+      lv += 1;
+      b += 1;
+      bi += 1;
+      len += 1;
+    }
+
+  /* handover buffers to remote node */
+  if (n_remote)
+    {
+      u32 n_remote_enq;
+      n_remote_enq = vlib_buffer_enqueue_to_thread (
+       vm, node, fqi, to_remote, thread_indices, n_remote, 1);
+      vlib_node_increment_counter (vm, node->node_index,
+                                  SFDP_LOOKUP_ERROR_REMOTE, n_remote_enq);
+      vlib_node_increment_counter (vm, node->node_index,
+                                  SFDP_LOOKUP_ERROR_CON_DROP,
+                                  n_remote - n_remote_enq);
+    }
+
+  /* enqueue local */
+  if (n_local)
+    {
+      u16 *current_next = local_next_indices;
+      u32 *local_flow_index = local_flow_indices;
+      uword session_scope_index;
+      b = local_bufs;
+      n_left = n_local;
+
+      /* TODO: prefetch session and buffer + 4 loop */
+      while (n_left)
+       {
+         session_index = local_flow_index[0] >> 1;
+         session = sfdp_session_at_index (session_index);
+         session_scope_index = session->scope_index;
+         if (PREDICT_TRUE (session_scope_index == scope_index))
+           {
+             sfdp_bitmap_t pbmp =
+               session->bitmaps[sfdp_direction_from_flow_index (
+                 local_flow_index[0])];
+             sfdp_buffer (b[0])->service_bitmap = pbmp;
+
+             /* The tenant of the buffer is the tenant of the session */
+             sfdp_buffer (b[0])->tenant_index = session->tenant_idx;
+
+             sfdp_next (b[0], current_next);
+           }
+         else
+           current_next[0] =
+             SFDP_LOOKUP_NEXT_INDEX_FOR_SCOPE (session_scope_index);
+
+         local_flow_index += 1;
+         current_next += 1;
+         b += 1;
+         n_left -= 1;
+       }
+      vlib_buffer_enqueue_to_next (vm, node, to_local, local_next_indices,
+                                  n_local);
+      vlib_node_increment_counter (vm, node->node_index,
+                                  SFDP_LOOKUP_ERROR_LOCAL, n_local);
+    }
+
+  if (n_to_sp)
+    {
+      vlib_frame_t *f = NULL;
+      u32 *current_next_slot = NULL;
+      u32 current_left_to_next = 0;
+      u32 *current_to_sp = to_sp;
+      u32 *sp_index = sp_indices;
+      u32 *sp_node_index = sp_node_indices;
+      u32 last_node_index = VLIB_INVALID_NODE_INDEX;
+
+      b = to_sp_bufs;
+      n_left = n_to_sp;
+
+      while (n_left)
+       {
+         u32 node_index;
+         u16 tenant_idx;
+         sfdp_tenant_t *tenant;
+
+         tenant_idx = sfdp_buffer (b[0])->tenant_index;
+         tenant = sfdp_tenant_at_index (sfdp, tenant_idx);
+         node_index = tenant->sp_node_indices[sp_index[0]];
+         sp_node_index[0] = node_index;
+
+         if (PREDICT_FALSE (node_index != last_node_index) ||
+             current_left_to_next == 0)
+           {
+             if (f != NULL)
+               vlib_put_frame_to_node (vm, last_node_index, f);
+             f = vlib_get_frame_to_node (vm, node_index);
+             f->frame_flags |= node->flags & VLIB_NODE_FLAG_TRACE;
+             current_next_slot = vlib_frame_vector_args (f);
+             current_left_to_next = VLIB_FRAME_SIZE;
+             last_node_index = node_index;
+           }
+
+         current_next_slot[0] = current_to_sp[0];
+
+         f->n_vectors += 1;
+         current_to_sp += 1;
+         b += 1;
+         sp_index += 1;
+         sp_node_index += 1;
+         current_next_slot += 1;
+
+         current_left_to_next -= 1;
+         n_left -= 1;
+       }
+      vlib_put_frame_to_node (vm, last_node_index, f);
+    }
+
+  if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE)))
+    {
+      int i;
+      b = bufs;
+      bi = from;
+      h = hashes;
+      u32 *in_local = to_local;
+      u32 *in_remote = to_remote;
+      u32 *in_sp = to_sp;
+      for (i = 0; i < frame->n_vectors; i++)
+       {
+         if (b[0]->flags & VLIB_BUFFER_IS_TRACED)
+           {
+             sfdp_lookup_trace_t *t =
+               vlib_add_trace (vm, node, b[0], sizeof (*t));
+             t->sw_if_index = vnet_buffer (b[0])->sw_if_index[VLIB_RX];
+             t->flow_id = b[0]->flow_id;
+             t->hash = h[0];
+             t->is_sp = 0;
+             if (bi[0] == in_local[0])
+               {
+                 t->next_index = local_next_indices[(in_local++) - to_local];
+               }
+             else if (bi[0] == in_remote[0])
+               {
+                 t->next_index = ~0;
+                 in_remote++;
+               }
+             else
+               {
+                 t->is_sp = 1;
+                 t->sp_index = sp_indices[in_sp - to_sp];
+                 t->sp_node_index = sp_node_indices[in_sp - to_sp];
+                 in_sp++;
+               }
+
+             if ((t->is_ip6 = is_ipv6))
+               clib_memcpy (&t->k6, &keys.keys6[i], sizeof (t->k6));
+             else
+               clib_memcpy (&t->k4, &keys.keys4[i], sizeof (t->k4));
+
+             bi++;
+             b++;
+             h++;
+           }
+         else
+           break;
+       }
+    }
+  return frame->n_vectors;
+}
+
+VLIB_NODE_FN (sfdp_lookup_ip4_node)
+(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame)
+{
+  return sfdp_lookup_inline (vm, node, frame, 0);
+}
+
+VLIB_NODE_FN (sfdp_lookup_ip6_node)
+(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame)
+{
+  return sfdp_lookup_inline (vm, node, frame, 1);
+}
+
+VLIB_NODE_FN (sfdp_handoff_node)
+(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame)
+{
+  vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b;
+  u32 *from = vlib_frame_vector_args (frame), *bi = from;
+  u32 n_left = frame->n_vectors;
+  u16 next_indices[VLIB_FRAME_SIZE], *current_next;
+  u32 next_buffers[VLIB_FRAME_SIZE], *next_buffer = next_buffers;
+  u32 drop_buffers[VLIB_FRAME_SIZE], *drop_buffer = drop_buffers;
+  size_t n_next = 0, n_drop = 0;
+  sfdp_lookup_node_runtime_data_t *rt = (void *) node->runtime_data;
+  u32 scope_index = rt->scope_index;
+
+  vlib_get_buffers (vm, from, bufs, n_left);
+  b = bufs;
+  current_next = next_indices;
+
+  /*TODO: prefetch, quad or octo loop...*/
+  while (n_left)
+    {
+      u32 flow_index = b[0]->flow_id;
+      u32 session_index = flow_index >> 1;
+
+      // Get session if valid and if session_version didn't change
+      sfdp_session_t *session = sfdp_session_at_index_if_valid (session_index);
+      if (PREDICT_TRUE (session &&
+                       session->session_version ==
+                         sfdp_buffer (b[0])->session_version_before_handoff))
+       {
+         u32 session_scope_index = session->scope_index;
+         if (PREDICT_TRUE (scope_index == session_scope_index))
+           {
+             sfdp_bitmap_t pbmp =
+               session->bitmaps[sfdp_direction_from_flow_index (flow_index)];
+             sfdp_buffer (b[0])->service_bitmap = pbmp;
+             sfdp_next (b[0], current_next);
+           }
+         else
+           current_next[0] =
+             SFDP_LOOKUP_NEXT_INDEX_FOR_SCOPE (session_scope_index);
+
+         *next_buffer = *bi;
+         current_next += 1;
+         next_buffer += 1;
+         n_next += 1;
+       }
+      else
+       {
+         // drop if session doesn't exist anymore
+         *drop_buffer = *bi;
+         drop_buffer += 1;
+         n_drop++;
+       }
+
+      b += 1;
+      bi += 1;
+      n_left -= 1;
+    }
+  vlib_buffer_enqueue_to_next (vm, node, next_buffers, next_indices, n_next);
+  vlib_buffer_free (vm, drop_buffers, n_drop);
+  vlib_node_increment_counter (vm, node->node_index,
+                              SFDP_HANDOFF_ERROR_SESS_DROP, n_drop);
+  if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE)))
+    {
+      int i;
+      b = bufs;
+      current_next = next_indices;
+      for (i = 0; i < frame->n_vectors; i++)
+       {
+         if (b[0]->flags & VLIB_BUFFER_IS_TRACED)
+           {
+             sfdp_handoff_trace_t *t =
+               vlib_add_trace (vm, node, b[0], sizeof (*t));
+             t->flow_id = b[0]->flow_id;
+             t->next_index = current_next[0];
+             b++;
+             current_next++;
+           }
+         else
+           break;
+       }
+    }
+  return frame->n_vectors;
+}
+
+static u8 *
+format_sfdp_lookup_trace (u8 *s, va_list *args)
+{
+  vlib_main_t *vm = va_arg (*args, vlib_main_t *);
+  vlib_node_t __clib_unused *node = va_arg (*args, vlib_node_t *);
+  sfdp_lookup_trace_t *t = va_arg (*args, sfdp_lookup_trace_t *);
+
+  if (!t->is_sp)
+    s = format (s,
+               "sfdp-lookup: sw_if_index %d, next index %d hash 0x%x "
+               "flow-id %u (session %u, %s) key 0x%U",
+               t->sw_if_index, t->next_index, t->hash, t->flow_id,
+               t->flow_id >> 1, t->flow_id & 0x1 ? "reverse" : "forward",
+               format_hex_bytes_no_wrap,
+               t->is_ip6 ? (u8 *) &t->k6 : (u8 *) &t->k4,
+               t->is_ip6 ? sizeof (t->k6) : sizeof (t->k4));
+  else
+    s = format (s,
+               "sfdp-lookup: sw_if_index %d, slow-path (%U) "
+               "slow-path node %U key 0x%U",
+               t->sw_if_index, format_sfdp_sp_node, t->sp_index,
+               format_vlib_node_name, vm, t->sp_node_index,
+               format_hex_bytes_no_wrap,
+               t->is_ip6 ? (u8 *) &t->k6 : (u8 *) &t->k4,
+               t->is_ip6 ? sizeof (t->k6) : sizeof (t->k4));
+  return s;
+}
+
+static u8 *
+format_sfdp_handoff_trace (u8 *s, va_list *args)
+{
+  vlib_main_t __clib_unused *vm = va_arg (*args, vlib_main_t *);
+  vlib_node_t __clib_unused *node = va_arg (*args, vlib_node_t *);
+  sfdp_handoff_trace_t *t = va_arg (*args, sfdp_handoff_trace_t *);
+
+  s = format (s,
+             "sfdp-handoff: next index %d "
+             "flow-id %u (session %u, %s)",
+             t->next_index, t->flow_id, t->flow_id >> 1,
+             t->flow_id & 0x1 ? "reverse" : "forward");
+  return s;
+}
+
+static sfdp_lookup_node_runtime_data_t lookup_rt_data_default = {
+  .scope_index = 0
+};
+
+VLIB_REGISTER_NODE (sfdp_lookup_ip4_node) = {
+  .name = "sfdp-lookup-ip4",
+  .vector_size = sizeof (u32),
+  .format_trace = format_sfdp_lookup_trace,
+  .type = VLIB_NODE_TYPE_INTERNAL,
+  .flags = VLIB_NODE_FLAG_ALLOW_LAZY_NEXT_NODES,
+  .runtime_data = &lookup_rt_data_default,
+  .runtime_data_bytes = sizeof (lookup_rt_data_default),
+  .n_errors = ARRAY_LEN (sfdp_lookup_error_strings),
+  .error_strings = sfdp_lookup_error_strings,
+};
+
+VLIB_REGISTER_NODE (sfdp_lookup_ip6_node) = {
+  .name = "sfdp-lookup-ip6",
+  .vector_size = sizeof (u32),
+  .format_trace = format_sfdp_lookup_trace,
+  .type = VLIB_NODE_TYPE_INTERNAL,
+  .flags = VLIB_NODE_FLAG_ALLOW_LAZY_NEXT_NODES,
+  .runtime_data = &lookup_rt_data_default,
+  .runtime_data_bytes = sizeof (lookup_rt_data_default),
+  .n_errors = ARRAY_LEN (sfdp_lookup_error_strings),
+  .error_strings = sfdp_lookup_error_strings,
+};
+
+VLIB_REGISTER_NODE (sfdp_handoff_node) = {
+  .name = "sfdp-handoff",
+  .vector_size = sizeof (u32),
+  .format_trace = format_sfdp_handoff_trace,
+  .type = VLIB_NODE_TYPE_INTERNAL,
+  .flags = VLIB_NODE_FLAG_ALLOW_LAZY_NEXT_NODES,
+  .n_errors = ARRAY_LEN (sfdp_handoff_error_counters),
+  .error_counters = sfdp_handoff_error_counters,
+  .runtime_data = &lookup_rt_data_default,
+  .runtime_data_bytes = sizeof (lookup_rt_data_default),
+};
diff --git a/src/vnet/sfdp/lookup/parser.c b/src/vnet/sfdp/lookup/parser.c
new file mode 100644 (file)
index 0000000..8dc8edb
--- /dev/null
@@ -0,0 +1,62 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright (c) 2025 Cisco Systems, Inc.
+ */
+
+#include <vnet/sfdp/sfdp.h>
+#include <vnet/sfdp/lookup/lookup_inlines.h>
+#include <vnet/sfdp/lookup/parser.h>
+#include <vnet/sfdp/lookup/parser_inlines.h>
+static uword
+sfdp_create_parser (sfdp_parser_main_t *pm,
+                   sfdp_parser_registration_mutable_t *reg)
+{
+  sfdp_parser_bihash_registration_t vft =
+    sfdp_parser_bihash_regs[reg->key_size];
+  sfdp_parser_data_t parser = { 0 };
+  void **key_ptd, **kv_ptd;
+  uword pi = vec_len (pm->parsers);
+  parser.bihash_table =
+    clib_mem_alloc_aligned (vft.table_size, CLIB_CACHE_LINE_BYTES);
+  clib_memset (parser.bihash_table, 0, vft.table_size);
+  vft.sfdp_parser_bihash_init_fn (parser.bihash_table, reg->name,
+                                 sfdp_ip4_num_buckets (),
+                                 sfdp_ip4_mem_size ());
+  vec_validate (parser.keys_ptd, vlib_num_workers ());
+  vec_validate (parser.kv_ptd, vlib_num_workers ());
+  vec_foreach (key_ptd, parser.keys_ptd)
+    key_ptd[0] = clib_mem_alloc_aligned (reg->key_size * VLIB_FRAME_SIZE,
+                                        CLIB_CACHE_LINE_BYTES);
+  vec_foreach (kv_ptd, parser.kv_ptd)
+    kv_ptd[0] =
+      clib_mem_alloc_aligned (reg->key_size + 8, CLIB_CACHE_LINE_BYTES);
+
+  parser.key_size = reg->key_size;
+  parser.name = reg->name;
+  parser.format_fn = reg->format_fn;
+  parser.normalize_key_fn = reg->normalize_key_fn;
+  vec_add1 (pm->parsers, parser);
+  return pi;
+}
+
+static clib_error_t *
+sfdp_parser_init (vlib_main_t *vm)
+{
+  sfdp_parser_main_t *pm = &sfdp_parser_main;
+  sfdp_parser_registration_mutable_t *current_reg = pm->regs;
+  vlib_call_init_function (vm, sfdp_init);
+  uword pi;
+
+  while (current_reg)
+    {
+      pi = sfdp_create_parser (pm, current_reg);
+      current_reg->sfdp_parser_data_index = pi;
+      current_reg = current_reg->next;
+    }
+  return 0;
+}
+
+#ifndef CLIB_MARCH_VARIANT
+sfdp_parser_main_t sfdp_parser_main;
+#endif
+
+VLIB_INIT_FUNCTION (sfdp_parser_init);
\ No newline at end of file
diff --git a/src/vnet/sfdp/lookup/parser.h b/src/vnet/sfdp/lookup/parser.h
new file mode 100644 (file)
index 0000000..832a377
--- /dev/null
@@ -0,0 +1,126 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright (c) 2025 Cisco Systems, Inc.
+ */
+
+#ifndef __included_lookup_parser_h__
+#define __included_lookup_parser_h__
+#include <vlib/vlib.h>
+#include <vppinfra/cpu.h>
+#include <vnet/sfdp/common.h>
+#include <vnet/sfdp/sfdp.h>
+
+#define SFDP_PARSER_MAX_KEY_SIZE 64
+typedef u8 calc_key_fn_t (vlib_buffer_t *b, u32 context_id, void *skey,
+                         u64 *lookup_val, u64 *h, i16 *l4_hdr_offset,
+                         u8 slowpath);
+
+typedef void normalize_key_fn_t (sfdp_session_t *session, void *result,
+                                u8 key_idx);
+
+enum
+{
+  SFDP_PARSER_FORMAT_FUNCTION_INGRESS,
+  SFDP_PARSER_FORMAT_FUNCTION_EGRESS,
+  SFDP_PARSER_FORMAT_FUNCTION_CONTEXT,
+  SFDP_PARSER_N_FORMAT_FUNCTION
+};
+
+/* Per march parser registration structure */
+typedef struct
+{
+  char *name;
+  calc_key_fn_t *const calc_key_fn;
+  const uword key_size;
+  const uword proto_offset;
+  sfdp_session_type_t type;
+  format_function_t *format_fn[SFDP_PARSER_N_FORMAT_FUNCTION];
+  normalize_key_fn_t *normalize_key_fn;
+
+} sfdp_parser_registration_t;
+
+typedef struct _sfdp_parser_registration_mutable_t
+{
+  struct _sfdp_parser_registration_mutable_t *next;
+  uword key_size;
+  uword sfdp_parser_data_index;
+  char *name;
+  vlib_node_registration_t *node_reg;
+  format_function_t *const *format_fn;
+  normalize_key_fn_t *normalize_key_fn;
+} sfdp_parser_registration_mutable_t;
+
+typedef void sfdp_parser_bihash_init_fn_t (void *bihash, char *name,
+                                          u32 nbuckets, uword memory_size);
+typedef int sfdp_parser_bihash_add_del_fn_t (void *bihash, void *kv,
+                                            int is_add);
+typedef u64 sfdp_parser_bihash_hash_fn_t (void *kv);
+typedef void sfdp_parser_bihash_prefetch_bucket_fn_t (void *bihash, u64 hash);
+typedef int sfdp_parser_bihash_search_with_hash_fn_t (void *bihash, u64 hash,
+                                                     void *kv_result);
+
+typedef int sfdp_parser_bihash_add_del_with_hash_fn_t (
+  void *bihash, void *kv, u64 hash, u8 is_add, void *is_stale_cb,
+  void *is_stale_arg, void *overwrite_cb, void *overwrite_arg);
+
+/* Per march bihash vfts */
+typedef struct
+{
+  sfdp_parser_bihash_init_fn_t *const sfdp_parser_bihash_init_fn;
+  sfdp_parser_bihash_add_del_fn_t *const sfdp_parser_bihash_add_del_fn;
+  sfdp_parser_bihash_hash_fn_t *const sfdp_parser_bihash_hash_fn;
+  sfdp_parser_bihash_prefetch_bucket_fn_t
+    *const sfdp_parser_bihash_prefetch_bucket_fn;
+  sfdp_parser_bihash_search_with_hash_fn_t
+    *const sfdp_parser_bihash_search_with_hash_fn;
+  /*  sfdp_parser_bihash_add_del_with_hash_fn_t *const
+   * sfdp_parser_bihash_add_del_with_hash_fn; */
+  uword table_size;
+} sfdp_parser_bihash_registration_t;
+
+typedef struct
+{
+  void *bihash_table;
+  void **keys_ptd; /* per thread vector of VLIB_FRAME_SIZE keys */
+  void **kv_ptd;   /* per thread vector of kv */
+  uword key_size;
+  char *name;
+  format_function_t *const *format_fn;
+  normalize_key_fn_t *normalize_key_fn;
+} sfdp_parser_data_t;
+
+typedef struct
+{
+  sfdp_parser_data_t *parsers;
+  sfdp_parser_registration_mutable_t *regs;
+  uword *parser_index_per_name;
+} sfdp_parser_main_t;
+
+#ifndef CLIB_MARCH_VARIANT
+#define SFDP_PARSER_REGISTER(x)                                               \
+  static const sfdp_parser_registration_t sfdp_parser_registration_##x;       \
+  sfdp_parser_registration_mutable_t sfdp_parser_registration_mutable_##x;    \
+  static void __sfdp_parser_registration_mutable_add_registration__##x (void) \
+    __attribute__ ((__constructor__));                                        \
+  static void __sfdp_parser_registration_mutable_add_registration__##x (void) \
+  {                                                                           \
+    sfdp_parser_main_t *pm = &sfdp_parser_main;                               \
+    sfdp_parser_registration_mutable_t *r =                                   \
+      &sfdp_parser_registration_mutable_##x;                                  \
+    r->next = pm->regs;                                                       \
+    r->key_size = sfdp_parser_registration_##x.key_size;                      \
+    r->name = sfdp_parser_registration_##x.name;                              \
+    r->format_fn = sfdp_parser_registration_##x.format_fn;                    \
+    r->normalize_key_fn = sfdp_parser_registration_##x.normalize_key_fn;      \
+    pm->regs = r;                                                             \
+  }                                                                           \
+  static const sfdp_parser_registration_t sfdp_parser_registration_##x
+#else
+#define SFDP_PARSER_REGISTER(x)                                               \
+  extern sfdp_parser_registration_mutable_t                                   \
+    sfdp_parser_registration_mutable_##x;                                     \
+  static sfdp_parser_registration_t sfdp_parser_registration_##x
+#endif
+
+extern sfdp_parser_main_t sfdp_parser_main;
+
+#endif /*__included_lookup_parser_h__*/
\ No newline at end of file
diff --git a/src/vnet/sfdp/lookup/parser_inlines.h b/src/vnet/sfdp/lookup/parser_inlines.h
new file mode 100644 (file)
index 0000000..d6c6f87
--- /dev/null
@@ -0,0 +1,646 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright (c) 2025 Cisco Systems, Inc.
+ */
+
+#ifndef __included_lookup_parser_inlines_h__
+#define __included_lookup_parser_inlines_h__
+#include <vnet/sfdp/sfdp.h>
+#include <vnet/sfdp/sfdp_funcs.h>
+#include <vnet/sfdp/service.h>
+#include <vnet/sfdp/lookup/parser.h>
+#include <vnet/sfdp/lookup/lookup.h>
+#include <vnet/sfdp/lookup/sfdp_bihashes.h>
+
+#if defined(__clang__) && __clang_major__ > 17
+#undef always_inline
+#define __sfdp_inline_here [[clang::always_inline]]
+#else
+#define __sfdp_inline_here
+#endif
+
+#define SFDP_PARSER_BIHASH_CALL_INLINE_FN(args...)                            \
+  ({ __sfdp_inline_here SFDP_PARSER_BIHASH_CALL_FN (args); })
+
+typedef struct
+{
+  u32 sw_if_index;
+  u8 key_data[64];
+  u16 parser_index;
+  u8 is_sp;
+  union
+  {
+    struct
+    {
+      u32 next_index;
+      u64 hash;
+      u32 flow_id;
+    };
+    struct
+    {
+      u32 sp_index;
+      u32 sp_node_index;
+    };
+  };
+} sfdp_parser_lookup_trace_t;
+
+static_always_inline u8
+sfdp_parser_lookup_four (const sfdp_parser_registration_t *reg,
+                        vlib_buffer_t **b, void *k, u64 *lookup_val, u64 *h,
+                        i16 *l4_hdr_offset, int prefetch_buffer_stride,
+                        u8 slowpath)
+{
+  vlib_buffer_t **pb = b + prefetch_buffer_stride;
+  u8 slowpath_needed = 0;
+  if (prefetch_buffer_stride)
+    {
+      clib_prefetch_load (pb[0]);
+      clib_prefetch_load (pb[0]->data);
+    }
+
+  __sfdp_inline_here slowpath_needed |=
+    reg->calc_key_fn (b[0], b[0]->flow_id, k + 0, lookup_val + 0, h + 0,
+                     l4_hdr_offset + 0, slowpath);
+
+  if (prefetch_buffer_stride)
+    {
+      clib_prefetch_load (pb[1]);
+      clib_prefetch_load (pb[1]->data);
+    }
+
+  __sfdp_inline_here slowpath_needed |=
+    reg->calc_key_fn (b[1], b[1]->flow_id, k + 1, lookup_val + 1, h + 1,
+                     l4_hdr_offset + 1, slowpath);
+
+  if (prefetch_buffer_stride)
+    {
+      clib_prefetch_load (pb[2]);
+      clib_prefetch_load (pb[2]->data);
+    }
+
+  __sfdp_inline_here slowpath_needed |=
+    reg->calc_key_fn (b[2], b[2]->flow_id, k + 2, lookup_val + 2, h + 2,
+                     l4_hdr_offset + 2, slowpath);
+
+  if (prefetch_buffer_stride)
+    {
+      clib_prefetch_load (pb[3]);
+      clib_prefetch_load (pb[3]->data);
+    }
+
+  __sfdp_inline_here slowpath_needed |=
+    reg->calc_key_fn (b[3], b[3]->flow_id, k + 3, lookup_val + 3, h + 3,
+                     l4_hdr_offset + 3, slowpath);
+  return slowpath_needed;
+}
+
+static_always_inline uword
+sfdp_parser_prepare_all_keys (const sfdp_parser_registration_t *reg,
+                             vlib_buffer_t **b, sfdp_session_ip4_key_t *k,
+                             u64 *lv, u64 *h, i16 *l4_hdr_offset, u32 n_left,
+                             u8 slowpath)
+{
+  /* main loop - prefetch next 4 buffers,
+   * prefetch previous 4 buckets */
+  while (n_left >= 8)
+    {
+      if (sfdp_parser_lookup_four (reg, b, k, lv, h, l4_hdr_offset, 4,
+                                  slowpath) &&
+         !slowpath)
+       return n_left;
+
+      b += 4;
+      k += 4;
+      lv += 4;
+      h += 4;
+      l4_hdr_offset += 4;
+      n_left -= 4;
+    }
+
+  /* last 4 packets - dont prefetch next 4 buffers,
+   * prefetch previous 4 buckets */
+  if (n_left >= 4)
+    {
+      if (sfdp_parser_lookup_four (reg, b, k, lv, h, l4_hdr_offset, 0,
+                                  slowpath) &&
+         !slowpath)
+       return n_left;
+
+      b += 4;
+      k += 4;
+      lv += 4;
+      h += 4;
+      l4_hdr_offset += 4;
+      n_left -= 4;
+    }
+
+  while (n_left > 0)
+    {
+      __sfdp_inline_here if (reg->calc_key_fn (b[0], b[0]->flow_id, k + 0,
+                                              lv + 0, h + 0,
+                                              l4_hdr_offset + 0, slowpath) &&
+                            !slowpath) return n_left;
+
+      b += 1;
+      k += 1;
+      lv += 1;
+      h += 1;
+      l4_hdr_offset += 1;
+      n_left -= 1;
+    }
+  return 0;
+}
+
+static_always_inline void
+sfdp_parser_prepare_all_keys_slow (const sfdp_parser_registration_t *reg,
+                                  vlib_buffer_t **b,
+                                  sfdp_session_ip4_key_t *k, u64 *lv, u64 *h,
+                                  i16 *l4_hdr_offset, u32 n_left)
+{
+  sfdp_parser_prepare_all_keys (reg, b, k, lv, h, l4_hdr_offset, n_left, 1);
+}
+
+static_always_inline uword
+sfdp_parser_prepare_all_keys_fast (const sfdp_parser_registration_t *reg,
+                                  vlib_buffer_t **b,
+                                  sfdp_session_ip4_key_t *k, u64 *lv, u64 *h,
+                                  i16 *l4_hdr_offset, u32 n_left)
+{
+  return sfdp_parser_prepare_all_keys (reg, b, k, lv, h, l4_hdr_offset, n_left,
+                                      0);
+}
+
+static_always_inline int
+sfdp_parser_create_session_inline (const sfdp_parser_registration_t *reg,
+                                  uword parser_data_index, sfdp_main_t *sfdp,
+                                  sfdp_per_thread_data_t *ptd,
+                                  sfdp_tenant_t *tenant, u16 tenant_idx,
+                                  u16 thread_index, f64 time_now, void *k,
+                                  u64 *h, u64 *lookup_val, u32 scope_index,
+                                  void *kv, const uword key_size,
+                                  void *table_bihash)
+{
+  u64 value;
+  u8 proto;
+  sfdp_session_t *session;
+  u32 session_idx;
+  u32 pseudo_flow_idx;
+
+  session_idx =
+    sfdp_alloc_session (sfdp, ptd, thread_index != SFDP_UNBOUND_THREAD_INDEX);
+
+  if (session_idx == ~0)
+    return 1;
+
+  session = pool_elt_at_index (sfdp->sessions, session_idx);
+
+  pseudo_flow_idx = (lookup_val[0] & 0x1) | (session_idx << 1);
+  value = sfdp_session_mk_table_value (thread_index, pseudo_flow_idx,
+                                      session->session_version + 1);
+
+  clib_memcpy_fast (kv, k, key_size);
+  clib_memcpy_fast (kv + key_size, &value, sizeof (value));
+  clib_memcpy_fast (&proto, k + reg->proto_offset, 1);
+  if (SFDP_PARSER_BIHASH_CALL_INLINE_FN (reg, sfdp_parser_bihash_add_del_fn,
+                                        table_bihash, kv, 2))
+    {
+      /* colision - remote thread created same entry */
+      sfdp_free_session (sfdp, ptd, session_idx);
+      return 2;
+    }
+  session->type = reg->type;
+  session->parser_index[SFDP_SESSION_KEY_PRIMARY] = parser_data_index;
+  session->key_flags = SFDP_SESSION_KEY_FLAG_PRIMARY_VALID_USER;
+
+  // TODO: Would be nice to do this upon free instead to have avoid having to
+  // check
+  //       if the session is valid at all when checking invalidation.
+  session->session_version += 1;
+  session->tenant_idx = tenant_idx;
+  session->state = SFDP_SESSION_STATE_FSOL;
+  session->owning_thread_index = thread_index;
+  session->scope_index = scope_index;
+  if (ptd)
+    sfdp_session_generate_and_set_id (sfdp, ptd, session);
+
+  clib_memcpy_fast (session->bitmaps, tenant->bitmaps,
+                   sizeof (session->bitmaps));
+  clib_memcpy_fast (&session->keys_data[SFDP_SESSION_KEY_PRIMARY], k,
+                   key_size);
+
+  session->pseudo_dir[SFDP_SESSION_KEY_PRIMARY] = lookup_val[0] & 0x1;
+  session->proto = proto;
+
+  lookup_val[0] ^= value;
+  /* Bidirectional counter zeroing */
+  vlib_zero_combined_counter (&sfdp->per_session_ctr[SFDP_FLOW_COUNTER_LOOKUP],
+                             lookup_val[0]);
+  vlib_zero_combined_counter (&sfdp->per_session_ctr[SFDP_FLOW_COUNTER_LOOKUP],
+                             lookup_val[0] | 0x1);
+  vlib_increment_simple_counter (
+    &sfdp->tenant_session_ctr[SFDP_TENANT_SESSION_COUNTER_CREATED],
+    thread_index, tenant_idx, 1);
+  return 0;
+}
+
+static_always_inline uword
+sfdp_parser_lookup_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
+                          vlib_frame_t *frame,
+                          const sfdp_parser_registration_t *reg,
+                          uword parser_data_index)
+{
+  sfdp_main_t *sfdp = &sfdp_main;
+  sfdp_parser_main_t *pm = &sfdp_parser_main;
+  u32 thread_index = vm->thread_index;
+  sfdp_per_thread_data_t *ptd =
+    vec_elt_at_index (sfdp->per_thread_data, thread_index);
+  vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b;
+  const uword key_size = reg->key_size;
+  sfdp_parser_data_t *parser =
+    vec_elt_at_index (pm->parsers, parser_data_index);
+  void *kv = vec_elt (parser->kv_ptd, thread_index);
+  void *table_bihash = parser->bihash_table;
+  void *keys = vec_elt (parser->keys_ptd, thread_index);
+  void *key = keys;
+  sfdp_tenant_t *tenant;
+  sfdp_session_t *session;
+
+  u32 session_index;
+  u32 *bi, *from = vlib_frame_vector_args (frame);
+  u32 n_left = frame->n_vectors;
+  u32 to_local[VLIB_FRAME_SIZE], n_local = 0;
+  u32 to_remote[VLIB_FRAME_SIZE], n_remote = 0;
+  u32 to_sp[VLIB_FRAME_SIZE], n_to_sp = 0;
+  u16 thread_indices[VLIB_FRAME_SIZE];
+  u16 local_next_indices[VLIB_FRAME_SIZE];
+  u32 sp_indices[VLIB_FRAME_SIZE];
+  u32 sp_node_indices[VLIB_FRAME_SIZE];
+  vlib_buffer_t *local_bufs[VLIB_FRAME_SIZE];
+  vlib_buffer_t *to_sp_bufs[VLIB_FRAME_SIZE];
+  u32 local_flow_indices[VLIB_FRAME_SIZE];
+  u32 created_session_indices[VLIB_FRAME_SIZE], n_created = 0;
+
+  u64 hashes[VLIB_FRAME_SIZE], *h = hashes;
+  u32 lengths[VLIB_FRAME_SIZE], *len = lengths;
+  i16 l4_hdr_off[VLIB_FRAME_SIZE], *l4o = l4_hdr_off;
+  f64 time_now = vlib_time_now (vm);
+  /* lookup_vals contains:
+   * - (Phase 1) to_slow_path_node (1bit)
+                 ||| slow_path_node_index (31bits)
+   *              ||| zeros(31bits)
+   *              |||
+   *              ||| packet_dir (1bit)
+   *
+   * - (Phase 2) session_version + thread_index + flow_index . Cf. sfdp.h
+      OR same as Phase 1 if slow path
+      ASSUMPTION: thread index < 2^31 */
+  u64 __attribute__ ((aligned (32))) lookup_vals[VLIB_FRAME_SIZE],
+    *lv = lookup_vals;
+  __clib_unused u16 hit_count = 0;
+  uword n_left_slow_keys;
+  sfdp_lookup_node_runtime_data_t *rt = (void *) node->runtime_data;
+  u32 scope_index = rt->scope_index;
+  u32 fqi =
+    vec_elt_at_index (sfdp->frame_queue_index_per_scope, scope_index)[0];
+
+  vlib_get_buffers (vm, from, bufs, n_left);
+  b = bufs;
+
+  if (PREDICT_FALSE ((n_left_slow_keys = sfdp_parser_prepare_all_keys_fast (
+                       reg, b, keys, lv, h, l4o, n_left))))
+    {
+      uword n_done = n_left - n_left_slow_keys;
+      sfdp_parser_prepare_all_keys_slow (
+       reg, b + n_done, keys + key_size * n_done, lv + n_done, h + n_done,
+       l4o + n_done, n_left_slow_keys);
+    }
+
+  while (n_left)
+    {
+      if (PREDICT_TRUE (n_left > 8))
+       SFDP_PARSER_BIHASH_CALL_INLINE_FN (
+         reg, sfdp_parser_bihash_prefetch_bucket_fn, table_bihash, h[8]);
+
+      if (PREDICT_TRUE (n_left > 1))
+       vlib_prefetch_buffer_header (b[1], STORE);
+
+      if (PREDICT_FALSE (lv[0] & SFDP_LV_TO_SP))
+       goto next_pkt;
+
+      clib_memcpy_fast (kv, key, key_size);
+      if (SFDP_PARSER_BIHASH_CALL_INLINE_FN (
+           reg, sfdp_parser_bihash_search_with_hash_fn, table_bihash, h[0],
+           kv))
+       {
+         u16 tenant_idx = sfdp_buffer (b[0])->tenant_index;
+         int rv;
+         tenant = sfdp_tenant_at_index (sfdp, tenant_idx);
+         rv = sfdp_parser_create_session_inline (
+           reg, parser_data_index, sfdp, ptd, tenant, tenant_idx,
+           thread_index, time_now, key, h, lv, scope_index, kv, key_size,
+           table_bihash);
+
+         if (PREDICT_FALSE (rv == 1))
+           {
+             vlib_node_increment_counter (
+               vm, node->node_index, SFDP_LOOKUP_ERROR_TABLE_OVERFLOW, 1);
+             lv[0] =
+               (u64) SFDP_SP_NODE_IP6_TABLE_OVERFLOW << 32 | SFDP_LV_TO_SP;
+             goto next_pkt;
+           }
+         else if (rv == 2)
+           {
+             vlib_node_increment_counter (vm, node->node_index,
+                                          SFDP_LOOKUP_ERROR_COLLISION, 1);
+             continue; /* if there is colision, we just reiterate */
+           }
+         created_session_indices[n_created] =
+           sfdp_session_index_from_lookup (lv[0]);
+         n_created++;
+       }
+      else
+       {
+         lv[0] ^= *(u64 *) (kv + key_size);
+         hit_count++;
+       }
+
+      b[0]->flow_id = sfdp_pseudo_flow_index_from_lookup (lv[0]);
+
+    next_pkt:
+      b[0]->flags |= VNET_BUFFER_F_L4_HDR_OFFSET_VALID;
+      vnet_buffer (b[0])->l4_hdr_offset = l4o[0];
+      len[0] = vlib_buffer_length_in_chain (vm, b[0]);
+
+      b += 1;
+      n_left -= 1;
+      key += key_size;
+      h += 1;
+      lv += 1;
+      len += 1;
+    }
+
+  // Notify created sessions
+  if (n_created)
+    {
+      sfdp_notify_new_sessions (sfdp, created_session_indices, n_created);
+    }
+
+  n_left = frame->n_vectors;
+  lv = lookup_vals;
+  b = bufs;
+  bi = from;
+  len = lengths;
+  while (n_left)
+    {
+      u16 flow_thread_index;
+      u32 flow_index;
+      session_version_t session_version;
+      vlib_combined_counter_main_t *vcm;
+
+      if (lv[0] & SFDP_LV_TO_SP)
+       {
+         to_sp[n_to_sp] = bi[0];
+         sp_indices[n_to_sp] = (lv[0] & ~(SFDP_LV_TO_SP)) >> 32;
+         to_sp_bufs[n_to_sp] = b[0];
+         n_to_sp++;
+         goto next_packet2;
+       }
+
+      flow_thread_index = sfdp_thread_index_from_lookup (lv[0]);
+      flow_index = sfdp_pseudo_flow_index_from_lookup (lv[0]);
+      session_index = flow_index >> 1;
+      vcm = &sfdp->per_session_ctr[SFDP_FLOW_COUNTER_LOOKUP];
+      session_version = sfdp_session_version_from_lookup (lv[0]);
+      vlib_increment_combined_counter (vcm, thread_index, flow_index, 1,
+                                      len[0]);
+      if (PREDICT_FALSE (flow_thread_index == SFDP_UNBOUND_THREAD_INDEX))
+       {
+         flow_thread_index = thread_index;
+         sfdp_session_bind_to_thread (session_index, &flow_thread_index, 1);
+         /* flow_thread_index now necessarily contains the actual thread index
+          * of the session */
+       }
+      if (flow_thread_index == thread_index)
+       {
+         /* known flow which belongs to this thread */
+         to_local[n_local] = bi[0];
+         local_flow_indices[n_local] = flow_index;
+         local_bufs[n_local] = b[0];
+         n_local++;
+       }
+      else
+       {
+         /* known flow which belongs to remote thread */
+         to_remote[n_remote] = bi[0];
+         thread_indices[n_remote] = flow_thread_index;
+         /* Store the current session version in buffer to check if it's still
+          * valid after handoff */
+         sfdp_buffer (b[0])->session_version_before_handoff = session_version;
+         n_remote++;
+       }
+    next_packet2:
+      n_left -= 1;
+      lv += 1;
+      b += 1;
+      bi += 1;
+      len += 1;
+    }
+
+  /* handover buffers to remote node */
+  if (n_remote)
+    {
+      u32 n_remote_enq;
+      n_remote_enq = vlib_buffer_enqueue_to_thread (
+       vm, node, fqi, to_remote, thread_indices, n_remote, 1);
+      vlib_node_increment_counter (vm, node->node_index,
+                                  SFDP_LOOKUP_ERROR_REMOTE, n_remote_enq);
+      vlib_node_increment_counter (vm, node->node_index,
+                                  SFDP_LOOKUP_ERROR_CON_DROP,
+                                  n_remote - n_remote_enq);
+    }
+
+  /* enqueue local */
+  if (n_local)
+    {
+      u16 *current_next = local_next_indices;
+      u32 *local_flow_index = local_flow_indices;
+      uword session_scope_index;
+      b = local_bufs;
+      n_left = n_local;
+
+      /* TODO: prefetch session and buffer + 4 loop */
+      while (n_left)
+       {
+         session_index = local_flow_index[0] >> 1;
+         session = sfdp_session_at_index (session_index);
+         session_scope_index = session->scope_index;
+         if (PREDICT_TRUE (session_scope_index == scope_index))
+           {
+             sfdp_bitmap_t pbmp =
+               session->bitmaps[sfdp_direction_from_flow_index (
+                 local_flow_index[0])];
+             sfdp_buffer (b[0])->service_bitmap = pbmp;
+
+             /* The tenant of the buffer is the tenant of the session */
+             sfdp_buffer (b[0])->tenant_index = session->tenant_idx;
+
+             sfdp_next (b[0], current_next);
+           }
+         else
+           current_next[0] =
+             SFDP_LOOKUP_NEXT_INDEX_FOR_SCOPE (session_scope_index);
+
+         local_flow_index += 1;
+         current_next += 1;
+         b += 1;
+         n_left -= 1;
+       }
+      vlib_buffer_enqueue_to_next (vm, node, to_local, local_next_indices,
+                                  n_local);
+      vlib_node_increment_counter (vm, node->node_index,
+                                  SFDP_LOOKUP_ERROR_LOCAL, n_local);
+    }
+
+  if (n_to_sp)
+    {
+      vlib_frame_t *f = NULL;
+      u32 *current_next_slot = NULL;
+      u32 current_left_to_next = 0;
+      u32 *current_to_sp = to_sp;
+      u32 *sp_index = sp_indices;
+      u32 *sp_node_index = sp_node_indices;
+      u32 last_node_index = VLIB_INVALID_NODE_INDEX;
+
+      b = to_sp_bufs;
+      n_left = n_to_sp;
+
+      while (n_left)
+       {
+         u32 node_index;
+         u16 tenant_idx;
+         sfdp_tenant_t *tenant;
+
+         tenant_idx = sfdp_buffer (b[0])->tenant_index;
+         tenant = sfdp_tenant_at_index (sfdp, tenant_idx);
+         node_index = tenant->sp_node_indices[sp_index[0]];
+         sp_node_index[0] = node_index;
+
+         if (PREDICT_FALSE (node_index != last_node_index) ||
+             current_left_to_next == 0)
+           {
+             if (f != NULL)
+               vlib_put_frame_to_node (vm, last_node_index, f);
+             f = vlib_get_frame_to_node (vm, node_index);
+             f->frame_flags |= node->flags & VLIB_NODE_FLAG_TRACE;
+             current_next_slot = vlib_frame_vector_args (f);
+             current_left_to_next = VLIB_FRAME_SIZE;
+             last_node_index = node_index;
+           }
+
+         current_next_slot[0] = current_to_sp[0];
+
+         f->n_vectors += 1;
+         current_to_sp += 1;
+         b += 1;
+         sp_index += 1;
+         sp_node_index += 1;
+         current_next_slot += 1;
+
+         current_left_to_next -= 1;
+         n_left -= 1;
+       }
+      vlib_put_frame_to_node (vm, last_node_index, f);
+    }
+
+  if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE)))
+    {
+      int i;
+      b = bufs;
+      bi = from;
+      h = hashes;
+      u32 *in_local = to_local;
+      u32 *in_remote = to_remote;
+      u32 *in_sp = to_sp;
+      for (i = 0; i < frame->n_vectors; i++)
+       {
+         if (b[0]->flags & VLIB_BUFFER_IS_TRACED)
+           {
+             sfdp_parser_lookup_trace_t *t =
+               vlib_add_trace (vm, node, b[0], sizeof (*t));
+             t->sw_if_index = vnet_buffer (b[0])->sw_if_index[VLIB_RX];
+             t->flow_id = b[0]->flow_id;
+             t->hash = h[0];
+             t->is_sp = 0;
+             t->parser_index = parser_data_index;
+             if (bi[0] == in_local[0])
+               {
+                 t->next_index = local_next_indices[(in_local++) - to_local];
+               }
+             else if (bi[0] == in_remote[0])
+               {
+                 t->next_index = ~0;
+                 in_remote++;
+               }
+             else
+               {
+                 t->is_sp = 1;
+                 t->sp_index = sp_indices[in_sp - to_sp];
+                 t->sp_node_index = sp_node_indices[in_sp - to_sp];
+                 in_sp++;
+               }
+
+             clib_memcpy (&t->key_data, i * key_size + keys, key_size);
+
+             bi++;
+             b++;
+             h++;
+           }
+         else
+           break;
+       }
+    }
+  return frame->n_vectors;
+}
+
+#ifndef CLIB_MARCH_VARIANT
+#define _SFDP_PARSER_DEFINE_NODE_AUX(x)                                       \
+  static void __sfdp_parser_definition_add_name__##x (void)                   \
+    __attribute__ ((__constructor__));                                        \
+  static void __sfdp_parser_definition_add_name__##x (void)                   \
+  {                                                                           \
+    x##_node.name = sfdp_parser_registration_##x.name;                        \
+    sfdp_parser_registration_mutable_##x.node_reg = &x##_node;                \
+  }
+
+#else
+#define _SFDP_PARSER_DEFINE_NODE_AUX(x)
+#endif
+#define SFDP_PARSER_DEFINE_NODE(x)                                            \
+  VLIB_REGISTER_NODE (x##_node) = {                                           \
+    .vector_size = sizeof (u32),                                              \
+    .format_trace = 0,                                                        \
+    .type = VLIB_NODE_TYPE_INTERNAL,                                          \
+    .flags = VLIB_NODE_FLAG_ALLOW_LAZY_NEXT_NODES,                            \
+    .runtime_data = 0,                                                        \
+    .runtime_data_bytes = sizeof (u8),                                        \
+    .n_errors = ARRAY_LEN (sfdp_lookup_error_strings),                        \
+    .error_strings = sfdp_lookup_error_strings,                               \
+  };                                                                          \
+                                                                              \
+  VLIB_NODE_FN (x##_node)                                                     \
+  (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame)        \
+  {                                                                           \
+    return sfdp_parser_lookup_inline (                                        \
+      vm, node, frame, &sfdp_parser_registration_##x,                         \
+      sfdp_parser_registration_mutable_##x.sfdp_parser_data_index);           \
+  }                                                                           \
+  _SFDP_PARSER_DEFINE_NODE_AUX (x)
+
+#if defined(__clang__) && __clang_major__ > 17
+#if CLIB_DEBUG > 0
+#define always_inline static inline
+#else
+#define always_inline static inline __attribute__ ((__always_inline__))
+#endif
+#endif
+
+#endif
\ No newline at end of file
diff --git a/src/vnet/sfdp/lookup/reass.c b/src/vnet/sfdp/lookup/reass.c
new file mode 100644 (file)
index 0000000..a1966a2
--- /dev/null
@@ -0,0 +1,74 @@
+/*
+ * Copyright (c) 2024 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vnet/ip/reass/ip4_full_reass.h>
+#include <vnet/ip/reass/ip6_full_reass.h>
+#include <vnet/ip/reass/ip4_sv_reass.h>
+#include <vnet/ip/reass/ip6_sv_reass.h>
+#include <vnet/sfdp/lookup/reass.h>
+#include <vnet/sfdp/sfdp.h>
+
+sfdp_reass_main_t sfdp_reass_main;
+
+static clib_error_t *
+sfdp_reass_main_init (vlib_main_t *vm)
+{
+  sfdp_reass_main_t *vrm = &sfdp_reass_main;
+  vrm->ip4_sv_reass_next_index =
+    ip4_sv_reass_custom_context_register_next_node (
+      sfdp_lookup_ip4_node.index);
+  vrm->ip6_sv_reass_next_index =
+    ip6_sv_reass_custom_context_register_next_node (
+      sfdp_lookup_ip6_node.index);
+  vrm->ip4_full_reass_next_index =
+    ip4_full_reass_custom_context_register_next_node (
+      sfdp_lookup_ip4_node.index);
+  vrm->ip6_full_reass_next_index =
+    ip6_full_reass_custom_context_register_next_node (
+      sfdp_lookup_ip6_node.index);
+  vrm->ip4_full_reass_err_next_index = ip4_full_reass_get_error_next_index ();
+  vrm->ip6_full_reass_err_next_index = ip6_full_reass_get_error_next_index ();
+  return 0;
+}
+
+void
+sfdp_ip4_full_reass_custom_context_register_next_node (u16 node_index)
+{
+  sfdp_reass_main.ip4_full_reass_next_index =
+    ip4_full_reass_custom_context_register_next_node (node_index);
+}
+
+void
+sfdp_ip6_full_reass_custom_context_register_next_node (u16 node_index)
+{
+  sfdp_reass_main.ip6_full_reass_next_index =
+    ip6_full_reass_custom_context_register_next_node (node_index);
+}
+
+void
+sfdp_ip4_full_reass_custom_context_register_next_err_node (u16 node_index)
+{
+  sfdp_reass_main.ip4_full_reass_err_next_index =
+    ip4_full_reass_custom_context_register_next_node (node_index);
+}
+
+void
+sfdp_ip6_full_reass_custom_context_register_next_err_node (u16 node_index)
+{
+  sfdp_reass_main.ip6_full_reass_err_next_index =
+    ip6_full_reass_custom_context_register_next_node (node_index);
+}
+
+VLIB_INIT_FUNCTION (sfdp_reass_main_init);
diff --git a/src/vnet/sfdp/lookup/reass.h b/src/vnet/sfdp/lookup/reass.h
new file mode 100644 (file)
index 0000000..5e188db
--- /dev/null
@@ -0,0 +1,24 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright (c) 2025 Cisco Systems, Inc.
+ */
+
+#ifndef __included_lookup_reass_h__
+#define __included_lookup_reass_h__
+
+#include <vlib/vlib.h>
+typedef struct
+{
+  /* Shallow Virtual Reassembly */
+  u16 ip4_sv_reass_next_index;
+  u16 ip6_sv_reass_next_index;
+
+  /* Full Reassembly */
+  u16 ip4_full_reass_next_index;
+  u16 ip6_full_reass_next_index;
+
+  /* Full Reassembly error next index */
+  u16 ip4_full_reass_err_next_index;
+  u16 ip6_full_reass_err_next_index;
+} sfdp_reass_main_t;
+extern sfdp_reass_main_t sfdp_reass_main;
+#endif
\ No newline at end of file
diff --git a/src/vnet/sfdp/lookup/sfdp_bihashes.h b/src/vnet/sfdp/lookup/sfdp_bihashes.h
new file mode 100644 (file)
index 0000000..992d1b3
--- /dev/null
@@ -0,0 +1,40 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright (c) 2025 Cisco Systems, Inc.
+ */
+
+#ifndef __included_lookup_sfdp_bihashes_h__
+#define __included_lookup_sfdp_bihashes_h__
+#include <vnet/sfdp/lookup/parser.h>
+#include <vppinfra/bihash_24_8.h>
+#include <vppinfra/bihash_32_8.h>
+#include <vppinfra/bihash_40_8.h>
+#include <vppinfra/bihash_48_8.h>
+#include <vppinfra/bihash_56_8.h>
+#define foreach_clib_bihash_kv                                                \
+  _ (24, 8)                                                                   \
+  _ (32, 8)                                                                   \
+  _ (40, 8)                                                                   \
+  _ (48, 8)                                                                   \
+  _ (56, 8)
+
+__attribute__ ((__unused__)) static const sfdp_parser_bihash_registration_t
+  sfdp_parser_bihash_regs[SFDP_PARSER_MAX_KEY_SIZE] = {
+#define _(k, v)                                                               \
+  [k] = {                                                                     \
+    .table_size = sizeof (clib_bihash_##k##_##v##_t),                         \
+    .sfdp_parser_bihash_add_del_fn = (void *) clib_bihash_add_del_##k##_##v,  \
+    .sfdp_parser_bihash_hash_fn = (void *) clib_bihash_hash_##k##_##v,        \
+    .sfdp_parser_bihash_init_fn = (void *) clib_bihash_init_##k##_##v,        \
+    .sfdp_parser_bihash_prefetch_bucket_fn =                                  \
+      (void *) clib_bihash_prefetch_bucket_##k##_##v,                         \
+    .sfdp_parser_bihash_search_with_hash_fn =                                 \
+      (void *) clib_bihash_search_inline_with_hash_##k##_##v,                 \
+  },
+
+    foreach_clib_bihash_kv
+#undef _
+  };
+
+#define SFDP_PARSER_BIHASH_CALL_FN(x, fn, args...)                            \
+  sfdp_parser_bihash_regs[(x)->key_size].fn (args)
+#endif
\ No newline at end of file
diff --git a/src/vnet/sfdp/lookup/sv_reass_node.c b/src/vnet/sfdp/lookup/sv_reass_node.c
new file mode 100644 (file)
index 0000000..e8d6ce8
--- /dev/null
@@ -0,0 +1,148 @@
+/*
+ * Copyright (c) 2022 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <vlib/vlib.h>
+#include <vnet/vnet.h>
+#include <vnet/sfdp/common.h>
+#include <vnet/sfdp/sfdp.h>
+#include <vnet/sfdp/lookup/reass.h>
+
+typedef struct
+{
+} sfdp_lookup_sp_sv_reass_trace_t;
+
+static u8 *
+format_sfdp_lookup_sp_sv_reass_trace (u8 *s, va_list *args)
+{
+  CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
+  vlib_node_t *node = va_arg (*args, vlib_node_t *);
+  CLIB_UNUSED (sfdp_lookup_sp_sv_reass_trace_t * t) =
+    va_arg (*args, sfdp_lookup_sp_sv_reass_trace_t *);
+
+  s = format (s, "%v: sent to svr node", node->name);
+  return s;
+}
+
+#define foreach_sfdp_lookup_sp_sv_reass_next                                  \
+  _ (IP4_SVR, "ip4-sv-reassembly-custom-context")                             \
+  _ (IP6_SVR, "ip6-sv-reassembly-custom-context")
+
+enum
+{
+#define _(sym, str) SFDP_LOOKUP_SP_SV_REASS_NEXT_##sym,
+  foreach_sfdp_lookup_sp_sv_reass_next
+#undef _
+    SFDP_LOOKUP_SP_SV_REASS_N_NEXT
+};
+
+#define foreach_sfdp_lookup_sp_sv_reass_error _ (NOERROR, "No error")
+
+typedef enum
+{
+#define _(sym, str) SFDP_LOOKUP_SP_SV_REASS_ERROR_##sym,
+  SFDP_LOOKUP_SP_SV_REASS_N_ERROR
+#undef _
+} sfdp_lookup_sp_sv_reass_error_t;
+
+static char *sfdp_lookup_sp_sv_reass_error_strings[] = {
+#define _(sym, str) str,
+  foreach_sfdp_lookup_sp_sv_reass_error
+#undef _
+};
+
+static_always_inline u32
+sfdp_lookup_sp_sv_reass_inline (vlib_main_t *vm, vlib_node_runtime_t *node,
+                               vlib_frame_t *frame, bool is_ip6)
+{
+  sfdp_reass_main_t *vrm = &sfdp_reass_main;
+  vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b;
+  u32 aux_data[VLIB_FRAME_SIZE], *a;
+  u32 *from = vlib_frame_vector_args (frame);
+  u32 n_left = frame->n_vectors;
+
+  vlib_get_buffers (vm, from, bufs, n_left);
+  b = bufs;
+  a = aux_data;
+  // TODO: prefetch + 4-loop
+  while (n_left)
+    {
+      a[0] = b[0]->flow_id;
+      if (b[0]->flags & VLIB_BUFFER_IS_TRACED)
+       {
+         sfdp_lookup_sp_sv_reass_trace_t *t =
+           vlib_add_trace (vm, node, b[0], sizeof (*t));
+       }
+
+      /* Save the tenant index */
+      sfdp_buffer2 (b[0])->tenant_index = sfdp_buffer (b[0])->tenant_index;
+      sfdp_buffer2 (b[0])->flags = SFDP_BUFFER_FLAG_SV_REASSEMBLED;
+
+      vnet_buffer (b[0])->ip.reass.next_index =
+       is_ip6 ? vrm->ip6_sv_reass_next_index : vrm->ip4_sv_reass_next_index;
+      b += 1;
+      a += 1;
+      n_left -= 1;
+    }
+  vlib_buffer_enqueue_to_single_next_with_aux (
+    vm, node, from, aux_data,
+    is_ip6 ? SFDP_LOOKUP_SP_SV_REASS_NEXT_IP6_SVR :
+            SFDP_LOOKUP_SP_SV_REASS_NEXT_IP4_SVR,
+    frame->n_vectors);
+  return frame->n_vectors;
+}
+
+VLIB_NODE_FN (sfdp_lookup_ip4_sp_sv_reass)
+(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame)
+{
+  return sfdp_lookup_sp_sv_reass_inline (vm, node, frame, 0);
+}
+
+VLIB_REGISTER_NODE (sfdp_lookup_ip4_sp_sv_reass) = {
+  .name = "sfdp-lookup-ip4-sp-sv-reass",
+  .vector_size = sizeof (u32),
+  .format_trace = format_sfdp_lookup_sp_sv_reass_trace,
+  .type = VLIB_NODE_TYPE_INTERNAL,
+
+  .n_errors = ARRAY_LEN (sfdp_lookup_sp_sv_reass_error_strings),
+  .error_strings = sfdp_lookup_sp_sv_reass_error_strings,
+  .next_nodes = {
+#define _(sym, str) [SFDP_LOOKUP_SP_SV_REASS_NEXT_##sym] = str,
+  foreach_sfdp_lookup_sp_sv_reass_next
+#undef _
+  },
+  .n_next_nodes = SFDP_LOOKUP_SP_SV_REASS_N_NEXT,
+};
+
+VLIB_NODE_FN (sfdp_lookup_ip6_sp_sv_reass)
+(vlib_main_t *vm, vlib_node_runtime_t *node, vlib_frame_t *frame)
+{
+  return sfdp_lookup_sp_sv_reass_inline (vm, node, frame, 1);
+}
+
+VLIB_REGISTER_NODE (sfdp_lookup_ip6_sp_sv_reass) = {
+  .name = "sfdp-lookup-ip6-sp-sv-reass",
+  .vector_size = sizeof (u32),
+  .format_trace = format_sfdp_lookup_sp_sv_reass_trace,
+  .type = VLIB_NODE_TYPE_INTERNAL,
+
+  .n_errors = ARRAY_LEN (sfdp_lookup_sp_sv_reass_error_strings),
+  .error_strings = sfdp_lookup_sp_sv_reass_error_strings,
+  .next_nodes = {
+#define _(sym, str) [SFDP_LOOKUP_SP_SV_REASS_NEXT_##sym] = str,
+  foreach_sfdp_lookup_sp_sv_reass_next
+#undef _
+  },
+  .n_next_nodes = SFDP_LOOKUP_SP_SV_REASS_N_NEXT,
+};
diff --git a/src/vnet/sfdp/service.c b/src/vnet/sfdp/service.c
new file mode 100644 (file)
index 0000000..d654715
--- /dev/null
@@ -0,0 +1,347 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright (c) 2025 Cisco Systems, Inc.
+ */
+
+#include <vlib/vlib.h>
+#include <vppinfra/ptclosure.h>
+#include <vnet/sfdp/sfdp.h>
+#include <vnet/sfdp/common.h>
+#include <vnet/sfdp/service.h>
+#include <vnet/sfdp/lookup/parser.h>
+
+static sfdp_service_registration_t **
+sfdp_service_init_for_scope (vlib_main_t *vm,
+                            sfdp_service_registration_t **services,
+                            uword *index_reg_by_name,
+                            uword **service_index_by_name)
+{
+  sfdp_service_registration_t *current_reg;
+  sfdp_service_registration_t **res_services = 0;
+  u8 **runs_after_table = 0;
+  u8 **closure = 0;
+  uword *ordered_indices = 0;
+  uword current_index = vec_len (services);
+
+  /* Build the constraints matrix */
+  if (current_index == 0)
+    return res_services;
+  current_reg = services[0];
+  runs_after_table = clib_ptclosure_alloc (current_index);
+
+  while (current_index > 0)
+    {
+      char **current_target;
+      current_index--;
+      current_reg = vec_elt_at_index (services, current_index)[0];
+
+      /* Process runs_before and runs_after constraints */
+      current_target = current_reg->runs_before;
+      while (current_target[0])
+       {
+         uword *res = hash_get_mem (index_reg_by_name, current_target[0]);
+         if (res)
+           runs_after_table[res[0]][current_index] = 1;
+         current_target++;
+       }
+      current_target = current_reg->runs_after;
+      while (current_target[0])
+       {
+         uword *res = hash_get_mem (index_reg_by_name, current_target[0]);
+         if (res)
+           runs_after_table[current_index][res[0]] = 1;
+         current_target++;
+       }
+    }
+  /*hash_free (index_reg_by_name);*/
+  closure = clib_ptclosure (runs_after_table);
+again:
+  for (int i = 0; i < vec_len (services); i++)
+    {
+      for (int j = 0; j < vec_len (services); j++)
+       {
+         if (closure[i][j])
+           {
+             /* i runs after j so it can't be output */
+             goto skip_i;
+           }
+       }
+      /* i doesn't run after any pending element so it can be output */
+      vec_add1 (ordered_indices, i);
+      for (int j = 0; j < vec_len (services); j++)
+       closure[j][i] = 0;
+      closure[i][i] = 1;
+      goto again;
+    skip_i:;
+    }
+  if (vec_len (services) != vec_len (ordered_indices))
+    clib_panic ("Failed to build total order for sfdp services");
+  clib_ptclosure_free (runs_after_table);
+  clib_ptclosure_free (closure);
+
+  vec_resize (res_services, vec_len (services));
+  for (uword i = 0; i < vec_len (ordered_indices); i++)
+    {
+      current_reg = vec_elt_at_index (services, ordered_indices[i])[0];
+      *current_reg->index_in_bitmap = i;
+      *current_reg->service_mask = 1ULL << i;
+      res_services[i] = current_reg;
+      hash_set_mem (*service_index_by_name, current_reg->node_name, i);
+    }
+  /*sm->service_index_by_name = service_index_by_name;*/
+  /*vec_free (services);*/
+  vec_free (ordered_indices);
+
+  /* Build the graph */
+  services = res_services;
+  for (uword i = 0; i < vec_len (services); i++)
+    {
+      sfdp_service_registration_t *reg_i = vec_elt_at_index (services, i)[0];
+      vlib_node_t *node_i =
+       vlib_get_node_by_name (vm, (u8 *) reg_i->node_name);
+      if (node_i == 0)
+       continue;
+      if (reg_i->is_terminal)
+       continue;
+      sfdp_service_next_indices_init (vm, node_i->index, services);
+    }
+  return res_services;
+}
+
+static void
+sfdp_service_init_parser_node_for_scope (
+  vlib_main_t *vm, vlib_node_registration_t *original_reg,
+  sfdp_service_registration_t **services, u32 scope_index,
+  const char *scope_name)
+{
+  sfdp_main_t *sfdp = &sfdp_main;
+  uword *parser_node_index_per_scope;
+  vlib_node_registration_t r;
+  sfdp_lookup_node_runtime_data_t rt = { .scope_index = scope_index };
+
+  uword original_node_index;
+  uword node_index;
+
+  original_node_index = original_reg->index;
+  vec_validate (sfdp->parser_node_index_per_scope_per_original,
+               original_node_index);
+  parser_node_index_per_scope = vec_elt_at_index (
+    sfdp->parser_node_index_per_scope_per_original, original_node_index)[0];
+  vec_validate (parser_node_index_per_scope, scope_index);
+  if (scope_index != 0)
+    {
+      clib_memset (&r, 0, sizeof (r));
+      r.vector_size = sizeof (u32);
+      r.format_trace = original_reg->format_trace;
+      r.type = VLIB_NODE_TYPE_INTERNAL;
+      r.runtime_data = &rt;
+      r.runtime_data_bytes = sizeof (rt);
+      r.n_errors = original_reg->n_errors;
+      r.error_strings = original_reg->error_strings;
+      r.error_counters = original_reg->error_counters;
+      r.node_fn_registrations = original_reg->node_fn_registrations;
+      r.flags = original_reg->flags;
+      node_index =
+       vlib_register_node (vm, &r, "%s-%s", original_reg->name, scope_name);
+    }
+  else
+    node_index = original_node_index;
+
+  parser_node_index_per_scope[scope_index] = node_index;
+  sfdp->parser_node_index_per_scope_per_original[original_node_index] =
+    parser_node_index_per_scope;
+  sfdp_service_next_indices_init (vm, node_index, services);
+}
+
+static void
+sfdp_service_init_nodes_for_scope (vlib_main_t *vm, u32 scope_index)
+{
+  sfdp_service_main_t *sm = &sfdp_service_main;
+  sfdp_main_t *sfdp = &sfdp_main;
+  sfdp_parser_main_t *pm = &sfdp_parser_main;
+
+  const char *scope_name = vec_elt_at_index (sm->scope_names, scope_index)[0];
+  vlib_node_registration_t r;
+  sfdp_service_registration_t **services =
+    vec_elt_at_index (sm->services_per_scope_index, scope_index)[0];
+  uword node_index;
+  sfdp_lookup_node_runtime_data_t rt = { .scope_index = scope_index };
+  sfdp_parser_registration_mutable_t *preg = pm->regs;
+
+#define _(n, s, x)                                                            \
+  if (scope_index != 0)                                                       \
+    {                                                                         \
+      clib_memset (&r, 0, sizeof (r));                                        \
+      r.vector_size = sizeof (u32);                                           \
+      r.format_trace = (n).format_trace;                                      \
+      r.type = VLIB_NODE_TYPE_INTERNAL;                                       \
+      r.runtime_data = &rt;                                                   \
+      r.runtime_data_bytes = sizeof (rt);                                     \
+      r.n_errors = (n).n_errors;                                              \
+      r.error_strings = (n).error_strings;                                    \
+      r.error_counters = (n).error_counters;                                  \
+      r.node_fn_registrations = (n).node_fn_registrations;                    \
+      r.flags = (n).flags;                                                    \
+      node_index = vlib_register_node (vm, &r, s "-%s", scope_name);          \
+      vec_validate (sfdp->x##_node_index_per_scope, scope_index);             \
+      sfdp->x##_node_index_per_scope[scope_index] = node_index;               \
+    }                                                                         \
+  else                                                                        \
+    {                                                                         \
+      node_index = (n).index;                                                 \
+      vec_validate (sfdp->x##_node_index_per_scope, scope_index);             \
+      sfdp->x##_node_index_per_scope[scope_index] = node_index;               \
+    }                                                                         \
+                                                                              \
+  sfdp_service_next_indices_init (vm, node_index, services);
+
+  _ (sfdp_lookup_ip4_node, "sfdp-lookup-ip4", ip4_lookup)
+  _ (sfdp_lookup_ip6_node, "sfdp-lookup-ip6", ip6_lookup)
+  _ (sfdp_handoff_node, "sfdp-handoff", handoff)
+#undef _
+  vec_validate (sfdp->frame_queue_index_per_scope, scope_index);
+
+  /* Last node index is handoff node */
+  sfdp->frame_queue_index_per_scope[scope_index] =
+    vlib_frame_queue_main_init (node_index, 0);
+
+  /* Same work for all parser nodes */
+  while (preg)
+    {
+      if (preg->node_reg)
+       sfdp_service_init_parser_node_for_scope (vm, preg->node_reg, services,
+                                                scope_index, scope_name);
+      preg = preg->next;
+    }
+}
+
+static clib_error_t *
+sfdp_service_init (vlib_main_t *vm)
+{
+  sfdp_service_main_t *sm = &sfdp_service_main;
+  sfdp_service_registration_t ***services_per_scope_index = 0;
+  sfdp_service_registration_t *current_reg;
+  uword *index_reg_by_name = hash_create_string (0, sizeof (uword));
+  uword *service_index_by_name = hash_create_string (0, sizeof (uword));
+  uword *scope_index_by_name = hash_create_string (0, sizeof (uword));
+  u32 n_scopes = 1;
+  const char **scope_names = 0;
+
+  vec_validate (services_per_scope_index, 0);
+  vec_validate (scope_names, 0);
+  scope_names[0] = "default";
+
+  current_reg = sm->next_service;
+
+  vlib_call_init_function (vm, sfdp_parser_init);
+  /* Parse the registrations linked list */
+  while (current_reg)
+    {
+      sfdp_service_registration_t **services;
+      uword *si;
+      u32 scope_index;
+      const char *name = current_reg->node_name;
+      const char *scope = current_reg->scope;
+      uword *res = hash_get_mem (index_reg_by_name, name);
+      uword current_index;
+
+      if (res)
+       clib_panic ("Trying to register %s twice!", name);
+
+      /* Scope already exists ? */
+      if (scope == 0)
+       scope_index = 0;
+      else if ((si = hash_get_mem (scope_index_by_name, scope)) == 0)
+       {
+         /* Create scope */
+         scope_index = n_scopes;
+         n_scopes += 1;
+         hash_set_mem (scope_index_by_name, scope, scope_index);
+         vec_validate (scope_names, scope_index);
+         scope_names[scope_index] = scope;
+       }
+      else
+       scope_index = *si;
+
+      vec_validate (services_per_scope_index, scope_index);
+
+      services = *vec_elt_at_index (services_per_scope_index, scope_index);
+      current_index = vec_len (services);
+      vec_add1 (services, current_reg);
+      services_per_scope_index[scope_index] = services;
+      hash_set_mem (index_reg_by_name, name, current_index);
+      current_reg = current_reg->next;
+    }
+
+  /* Initialise each scope */
+  for (int i = 0; i < n_scopes; i++)
+    {
+      sfdp_service_registration_t **res_services;
+      res_services = sfdp_service_init_for_scope (
+       vm, services_per_scope_index[i], index_reg_by_name,
+       &service_index_by_name);
+      vec_free (services_per_scope_index[i]);
+      services_per_scope_index[i] = res_services;
+    }
+  sm->scope_names = scope_names;
+  sm->scope_index_by_name = scope_index_by_name;
+  sm->n_scopes = n_scopes;
+  sm->service_index_by_name = service_index_by_name;
+  sm->services_per_scope_index = services_per_scope_index;
+  hash_free (index_reg_by_name);
+
+  /* Create the lookup nodes for each scope */
+  for (int i = 0; i < n_scopes; i++)
+    sfdp_service_init_nodes_for_scope (vm, i);
+
+  /* Connect lookup nodes to handoff nodes of other scopes */
+  for (int i = 0; i < n_scopes; i++)
+    for (int j = 0; j < n_scopes; j++)
+      {
+       uword from_ni_v4, from_ni_v6, from_ni_hoff, from_ni_parser, to_ni;
+       uword **parser_node_index_per_scope;
+       from_ni_v4 = sfdp_main.ip4_lookup_node_index_per_scope[i];
+       from_ni_v6 = sfdp_main.ip6_lookup_node_index_per_scope[i];
+       from_ni_hoff = sfdp_main.handoff_node_index_per_scope[i];
+       to_ni = sfdp_main.handoff_node_index_per_scope[j];
+
+       if (i == j)
+         continue;
+       vlib_node_add_next_with_slot (vm, from_ni_v4, to_ni,
+                                     SFDP_LOOKUP_NEXT_INDEX_FOR_SCOPE (j));
+       vlib_node_add_next_with_slot (vm, from_ni_v6, to_ni,
+                                     SFDP_LOOKUP_NEXT_INDEX_FOR_SCOPE (j));
+       vlib_node_add_next_with_slot (vm, from_ni_hoff, to_ni,
+                                     SFDP_LOOKUP_NEXT_INDEX_FOR_SCOPE (j));
+
+       /* Connect each parser_node for scope i to handoff of scope j */
+       vec_foreach (parser_node_index_per_scope,
+                    sfdp_main.parser_node_index_per_scope_per_original)
+         if (vec_len (parser_node_index_per_scope) > i)
+           {
+             from_ni_parser = parser_node_index_per_scope[0][i];
+             vlib_node_add_next_with_slot (
+               vm, from_ni_parser, to_ni,
+               SFDP_LOOKUP_NEXT_INDEX_FOR_SCOPE (j));
+           }
+      }
+
+  vlib_node_main_lazy_next_update (vm);
+  return 0;
+}
+
+void
+sfdp_service_next_indices_init (vlib_main_t *vm, uword node_index,
+                               sfdp_service_registration_t **services)
+{
+  for (uword i = 0; i < vec_len (services); i++)
+    {
+      sfdp_service_registration_t *reg = vec_elt_at_index (services, i)[0];
+      vlib_node_t *node = vlib_get_node_by_name (vm, (u8 *) reg->node_name);
+      if (node)
+       vlib_node_add_next_with_slot (vm, node_index, node->index,
+                                     *reg->index_in_bitmap);
+    }
+}
+
+VLIB_INIT_FUNCTION (sfdp_service_init);
+sfdp_service_main_t sfdp_service_main;
\ No newline at end of file
diff --git a/src/vnet/sfdp/service.h b/src/vnet/sfdp/service.h
new file mode 100644 (file)
index 0000000..a203573
--- /dev/null
@@ -0,0 +1,99 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright (c) 2025 Cisco Systems, Inc.
+ */
+
+#ifndef __included_sfdp_service_h__
+#define __included_sfdp_service_h__
+#include <vlib/vlib.h>
+#include <vnet/sfdp/common.h>
+
+typedef struct _sfdp_service_registration_t
+{
+  struct _sfdp_service_registration_t *next;
+  const char *node_name;
+  const char *scope;
+  char **runs_before;
+  char **runs_after;
+  u8 *index_in_bitmap;
+  sfdp_bitmap_t *service_mask;
+  u8 is_terminal;
+} sfdp_service_registration_t;
+
+typedef struct
+{
+  sfdp_service_registration_t *next_service;
+  sfdp_service_registration_t ***services_per_scope_index;
+  uword *scope_index_by_name;
+  const char **scope_names;
+  uword n_scopes;
+  uword *service_index_by_name;
+} sfdp_service_main_t;
+
+extern sfdp_service_main_t sfdp_service_main;
+
+#define SFDP_SERVICE_DECLARE(x)                                               \
+  extern u8 sfdp_service_index_in_bitmap_##x;                                 \
+  extern sfdp_bitmap_t sfdp_service_mask_##x;
+
+#define SFDP_SERVICE_MASK(x)  sfdp_service_mask_##x
+#define SFDP_SERVICE_INDEX(x) sfdp_service_index_in_bitmap_##x
+
+#ifndef CLIB_MARCH_VARIANT
+#define SFDP_SERVICE_DEFINE(x)                                                \
+  static sfdp_service_registration_t sfdp_service_registration_##x;           \
+  static void __sfdp_service_add_registration_##x (void)                      \
+    __attribute__ ((__constructor__));                                        \
+  u8 sfdp_service_index_in_bitmap_##x;                                        \
+  sfdp_bitmap_t sfdp_service_mask_##x;                                        \
+  static void __sfdp_service_add_registration_##x (void)                      \
+  {                                                                           \
+    sfdp_service_main_t *sm = &sfdp_service_main;                             \
+    sfdp_service_registration_t *r = &sfdp_service_registration_##x;          \
+    r->next = sm->next_service;                                               \
+    sm->next_service = r;                                                     \
+    r->index_in_bitmap = &sfdp_service_index_in_bitmap_##x;                   \
+    r->service_mask = &sfdp_service_mask_##x;                                 \
+  }                                                                           \
+  static sfdp_service_registration_t sfdp_service_registration_##x
+#else
+#define SFDP_SERVICE_DEFINE(x)                                                \
+  SFDP_SERVICE_DECLARE (x);                                                   \
+  static sfdp_service_registration_t __clib_unused                            \
+    unused_sfdp_service_registration_##x
+
+#endif
+
+#define SFDP_SERVICES(...)                                                    \
+  (char *[])                                                                  \
+  {                                                                           \
+    __VA_ARGS__, 0                                                            \
+  }
+
+static_always_inline void
+sfdp_next (vlib_buffer_t *b, u16 *next_index)
+{
+  sfdp_bitmap_t bmp = sfdp_buffer (b)->service_bitmap;
+  u8 first = __builtin_ffsll (bmp);
+  ASSERT (first != 0);
+  *next_index = (first - 1);
+  sfdp_buffer (b)->service_bitmap ^= 1ULL << (first - 1);
+}
+
+#define foreach_sfdp_scope_index(s_var)                                       \
+  for (s_var = 0; s_var < sfdp_service_main.n_scopes; s_var++)
+void sfdp_service_next_indices_init (vlib_main_t *vm, uword node_index,
+                                    sfdp_service_registration_t **services);
+
+static_always_inline u8
+sfdp_get_service_index_by_name (const char *name)
+{
+  sfdp_service_main_t *sm = &sfdp_service_main;
+  uword *res = hash_get_mem (sm->service_index_by_name, name);
+  if (res == NULL)
+    {
+      clib_panic ("Unknown service name '%s'", name);
+    }
+  return *res;
+}
+
+#endif //__included_service_h__
\ No newline at end of file
diff --git a/src/vnet/sfdp/sfdp.api b/src/vnet/sfdp/sfdp.api
new file mode 100644 (file)
index 0000000..1e4bf29
--- /dev/null
@@ -0,0 +1,115 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright (c) 2025 Cisco Systems, Inc.
+ */
+
+option version = "0.0.1";
+import "vnet/ip/ip_types.api";
+import "vnet/sfdp/sfdp_types.api";
+
+/** \brief
+    @param client_index - opaque cookie to identify the sender
+    @param context - sender context, to match reply w/ request
+    @param tenant_id - tenant id to add
+    @param context_id - context id for this tenant (optional, default to same as tenant id)
+    @param is_del
+*/
+
+autoreply define sfdp_tenant_add_del
+{
+  u32 client_index;
+  u32 context;
+
+  u32 tenant_id;
+  u32 context_id [default=0xffffffff];
+  bool is_del;
+};
+
+autoreply define sfdp_set_services
+{
+  u32 client_index;
+  u32 context;
+
+  u32 tenant_id;
+  vl_api_sfdp_session_direction_t dir;
+  u8 n_services;
+  vl_api_sfdp_service_name_t services[n_services];
+};
+
+define sfdp_session_dump
+{
+  u32 client_index;
+  u32 context;
+};
+
+define sfdp_session_details
+{
+  u32 context;
+
+  u64 session_id;
+  u32 thread_index;
+  u32 tenant_id;
+  u32 session_idx;
+  vl_api_sfdp_session_type_t session_type;
+  vl_api_ip_proto_t protocol;
+  vl_api_sfdp_session_state_t state;
+  f64 remaining_time;
+/* Avoid service strings to keep it compact */
+  u64 forward_bitmap;
+  u64 reverse_bitmap;
+  u8 n_keys;
+  vl_api_sfdp_session_key_t keys[n_keys];
+};
+
+define sfdp_tenant_dump
+{
+  u32 client_index;
+  u32 context;
+};
+
+define sfdp_tenant_details
+{
+  u32 context;
+
+  u32 index;
+  u32 context_id;
+  u64 forward_bitmap;
+  u64 reverse_bitmap;
+  u32 n_timeout;
+  u32 timeout[n_timeout];
+};
+
+autoreply define sfdp_set_timeout
+{
+  u32 client_index;
+  u32 context;
+
+  u32 tenant_id;
+  u32 timeout_id;
+  u32 timeout_value;
+};
+
+autoreply define sfdp_set_sp_node
+{
+  u32 client_index;
+  u32 context;
+
+  u32 tenant_id;
+  vl_api_sfdp_sp_node_t sp_node;
+  u32 node_index;
+};
+
+autoreply define sfdp_set_icmp_error_node
+{
+  u32 client_index;
+  u32 context;
+
+  u32 tenant_id;
+  bool is_ip6;
+  u32 node_index;
+};
+
+/*
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/vnet/sfdp/sfdp.c b/src/vnet/sfdp/sfdp.c
new file mode 100644 (file)
index 0000000..8d24306
--- /dev/null
@@ -0,0 +1,550 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright (c) 2025 Cisco Systems, Inc.
+ */
+
+#define _GNU_SOURCE
+#include <sys/mman.h>
+
+#include <vppinfra/bihash_24_8.h>
+/*
+ * Not needed because instanciated in ip6_fib.c
+ * #include <vppinfra/bihash_template.c>
+ */
+#undef __included_bihash_template_inlines_h__
+#include <vppinfra/bihash_template_inlines.h>
+
+#include <vppinfra/bihash_32_8.h>
+#include <vppinfra/bihash_template.c>
+
+#include <vppinfra/bihash_40_8.h>
+/*
+ * Not needed because instanciated in ip6_forward.c
+ * #include <vppinfra/bihash_template.c>
+ */
+#undef __included_bihash_template_inlines_h__
+#include <vppinfra/bihash_template_inlines.h>
+
+#include <vppinfra/bihash_48_8.h>
+/*
+ * Not needed because instanciated in session_lookup.c
+ * #include <vppinfra/bihash_template.c>
+ */
+#undef __included_bihash_template_inlines_h__
+#include <vppinfra/bihash_template_inlines.h>
+
+#include <vppinfra/bihash_56_8.h>
+#include <vppinfra/bihash_template.c>
+
+#include <vnet/sfdp/sfdp.h>
+#include <vnet/sfdp/lookup/lookup_inlines.h>
+#include <vnet/sfdp/service.h>
+#include <vnet/sfdp/timer/timer.h>
+#include <vnet/plugin/plugin.h>
+#include <vnet/vnet.h>
+#include <vnet/ip/reass/ip4_sv_reass.h>
+#include <vnet/ip/reass/ip6_sv_reass.h>
+
+#include <vlibapi/api.h>
+#include <vlibmemory/api.h>
+#include <vnet/sfdp/service.h>
+#define SFDP_DEFAULT_BITMAP SFDP_SERVICE_MASK (drop)
+
+SFDP_SERVICE_DECLARE (drop)
+
+sfdp_main_t sfdp_main;
+
+static void
+sfdp_init_ptd_counters ()
+{
+  sfdp_main_t *sfdp = &sfdp_main;
+#define _(x, y)                                                               \
+  u8 *name = format (0, y "%c", 0);                                           \
+  u8 *stat_seg_name = format (0, "/sfdp/per_flow_counters/" y "%c", 0);       \
+  sfdp->per_session_ctr[SFDP_FLOW_COUNTER_##x].name = (char *) name;          \
+  sfdp->per_session_ctr[SFDP_FLOW_COUNTER_##x].stat_segment_name =            \
+    (char *) stat_seg_name;                                                   \
+  vlib_validate_combined_counter (                                            \
+    &sfdp->per_session_ctr[SFDP_FLOW_COUNTER_##x],                            \
+    1ULL << (sfdp->log2_sessions + 1));
+
+  foreach_sfdp_flow_counter
+#undef _
+}
+
+static void
+sfdp_init_tenant_counters (sfdp_main_t *sfdp)
+{
+#define _(x, y, z)                                                            \
+  sfdp->tenant_session_ctr[SFDP_TENANT_SESSION_COUNTER_##x].name = y;         \
+  sfdp->tenant_session_ctr[SFDP_TENANT_SESSION_COUNTER_##x]                   \
+    .stat_segment_name = "/sfdp/per_tenant_counters/" y;                      \
+  vlib_validate_simple_counter (                                              \
+    &sfdp->tenant_session_ctr[SFDP_TENANT_SESSION_COUNTER_##x],               \
+    1ULL << (1 + sfdp->log2_tenants));
+
+  foreach_sfdp_tenant_session_counter
+#undef _
+#define _(x, y, z)                                                            \
+  sfdp->tenant_data_ctr[SFDP_TENANT_DATA_COUNTER_##x].name = y;               \
+  sfdp->tenant_data_ctr[SFDP_TENANT_DATA_COUNTER_##x].stat_segment_name =     \
+    "/sfdp/per_tenant_counters/" y;                                           \
+  vlib_validate_combined_counter (                                            \
+    &sfdp->tenant_data_ctr[SFDP_TENANT_DATA_COUNTER_##x],                     \
+    1ULL << (1 + sfdp->log2_tenants));
+
+    foreach_sfdp_tenant_data_counter
+#undef _
+}
+
+static void
+sfdp_init_main_if_needed (sfdp_main_t *sfdp)
+{
+  static u32 done = 0;
+  vlib_thread_main_t *tm = vlib_get_thread_main ();
+  if (done)
+    return;
+  time_t epoch = time (NULL);
+  uword log_n_thread = max_log2 (tm->n_vlib_mains);
+  uword template_shift =
+    SFDP_SESSION_ID_TOTAL_BITS - SFDP_SESSION_ID_EPOCH_N_BITS - log_n_thread;
+  sfdp->session_id_ctr_mask = (((u64) 1 << template_shift) - 1);
+  /* initialize per-thread data */
+  vec_validate (sfdp->per_thread_data, tm->n_vlib_mains - 1);
+  for (int i = 0; i < tm->n_vlib_mains; i++)
+    {
+      sfdp_per_thread_data_t *ptd =
+       vec_elt_at_index (sfdp->per_thread_data, i);
+      ptd->expired_sessions = 0;
+      ptd->session_id_template = (u64) epoch
+                                << (template_shift + log_n_thread);
+      ptd->session_id_template |= (u64) i << template_shift;
+      ptd->session_freelist = 0;
+    }
+  if (vlib_num_workers ())
+    clib_spinlock_init (&sfdp->session_lock);
+
+  pool_init_fixed (sfdp->sessions, sfdp_num_sessions ());
+  sfdp->free_sessions = sfdp_num_sessions ();
+  sfdp_init_ptd_counters ();
+  pool_init_fixed (sfdp->tenants, 1ULL << sfdp->log2_tenants);
+
+  sfdp_init_tenant_counters (sfdp);
+
+  clib_bihash_init_24_8 (&sfdp->table4, "sfdp ipv4 session table",
+                        sfdp_ip4_num_buckets (), sfdp_ip4_mem_size ());
+  clib_bihash_init_48_8 (&sfdp->table6, "sfdp ipv6 session table",
+                        sfdp_ip6_num_buckets (), sfdp_ip6_mem_size ());
+  clib_bihash_init_8_8 (&sfdp->tenant_idx_by_id, "sfdp tenant table",
+                       sfdp_tenant_num_buckets (), sfdp_tenant_mem_size ());
+  clib_bihash_init_8_8 (&sfdp->session_index_by_id, "session idx by id",
+                       sfdp_ip4_num_buckets (), sfdp_ip4_mem_size ());
+
+  sfdp->icmp4_error_frame_queue_index =
+    vlib_frame_queue_main_init (sfdp_lookup_ip4_icmp_node.index, 0);
+  sfdp->icmp6_error_frame_queue_index =
+    vlib_frame_queue_main_init (sfdp_lookup_ip6_icmp_node.index, 0);
+
+  /* User timer as default if no other has been registered yet. */
+  if (!sfdp->expiry_callbacks.expire_or_evict_sessions)
+    {
+      sfdp_timer_register_as_expiry_module ();
+    }
+
+  done = 1;
+}
+
+static clib_error_t *
+sfdp_init (vlib_main_t *vm)
+{
+  sfdp_main_t *sfdp = &sfdp_main;
+  clib_error_t *err;
+#define _(val, default) sfdp->val = sfdp->val ? sfdp->val : default;
+
+  _ (log2_sessions, SFDP_DEFAULT_LOG2_SESSIONS)
+  _ (log2_sessions_cache_per_thread,
+     SFDP_DEFAULT_LOG2_SESSIONS - SFDP_DEFAULT_LOG2_SESSIONS_CACHE_RATIO)
+  _ (log2_tenants, SFDP_DEFAULT_LOG2_TENANTS)
+#undef _
+  sfdp->no_main = sfdp->no_main && vlib_num_workers ();
+
+  /* sfdp->eviction_sessions_margin came from early_config */
+  if ((err = sfdp_set_eviction_sessions_margin (
+        sfdp->eviction_sessions_margin)) != 0)
+    return err;
+
+  // vlib_call_init_function (vm, sfdp_service_init);
+  return 0;
+}
+
+void
+sfdp_tenant_clear_counters (sfdp_main_t *sfdp, u32 tenant_idx)
+{
+#define _(x, y, z)                                                            \
+  sfdp->tenant_session_ctr[SFDP_TENANT_SESSION_COUNTER_##x].name = y;         \
+  sfdp->tenant_session_ctr[SFDP_TENANT_SESSION_COUNTER_##x]                   \
+    .stat_segment_name = "/sfdp/per_tenant_counters/" y;                      \
+  vlib_zero_simple_counter (                                                  \
+    &sfdp->tenant_session_ctr[SFDP_TENANT_SESSION_COUNTER_##x], tenant_idx);
+
+  foreach_sfdp_tenant_session_counter
+#undef _
+#define _(x, y, z)                                                            \
+  sfdp->tenant_data_ctr[SFDP_TENANT_DATA_COUNTER_##x].name = y;               \
+  sfdp->tenant_data_ctr[SFDP_TENANT_DATA_COUNTER_##x].stat_segment_name =     \
+    "/sfdp/per_tenant_counters/" y;                                           \
+  vlib_zero_combined_counter (                                                \
+    &sfdp->tenant_data_ctr[SFDP_TENANT_DATA_COUNTER_##x], tenant_idx);
+
+    foreach_sfdp_tenant_data_counter
+#undef _
+}
+
+static void
+sfdp_tenant_init_timeouts (sfdp_tenant_t *tenant)
+{
+  for (u32 idx = 0; idx < SFDP_MAX_TIMEOUTS; idx++)
+    {
+      tenant->timeouts[idx] = sfdp_main.timeouts[idx].val;
+    }
+}
+
+static void
+sfdp_tenant_init_sp_nodes (sfdp_tenant_t *tenant)
+{
+  vlib_main_t *vm = vlib_get_main ();
+  vlib_node_t *node;
+
+#define _(sym, default, str)                                                  \
+  node = vlib_get_node_by_name (vm, (u8 *) (default));                        \
+  tenant->sp_node_indices[SFDP_SP_NODE_##sym] = node->index;
+
+  foreach_sfdp_sp_node
+#undef _
+}
+
+clib_error_t *
+sfdp_tenant_add_del (sfdp_main_t *sfdp, u32 tenant_id, u32 context_id,
+                    u8 is_del)
+{
+  sfdp_init_main_if_needed (sfdp);
+  clib_bihash_kv_8_8_t kv = { .key = tenant_id, .value = 0 };
+  clib_error_t *err = 0;
+  sfdp_tenant_t *tenant;
+  u32 tenant_idx;
+  u32 n_tenants = pool_elts (sfdp->tenants);
+  if (!is_del)
+    {
+      if (clib_bihash_search_inline_8_8 (&sfdp->tenant_idx_by_id, &kv))
+       {
+         pool_get (sfdp->tenants, tenant);
+         tenant_idx = tenant - sfdp->tenants;
+         tenant->bitmaps[SFDP_FLOW_FORWARD] = SFDP_DEFAULT_BITMAP;
+         tenant->bitmaps[SFDP_FLOW_REVERSE] = SFDP_DEFAULT_BITMAP;
+         tenant->tenant_id = tenant_id;
+         tenant->context_id = context_id;
+         sfdp_tenant_init_timeouts (tenant);
+         sfdp_tenant_init_sp_nodes (tenant);
+         kv.key = tenant_id;
+         kv.value = tenant_idx;
+         clib_bihash_add_del_8_8 (&sfdp->tenant_idx_by_id, &kv, 1);
+         sfdp_tenant_clear_counters (sfdp, tenant_idx);
+       }
+      else
+       {
+         err = clib_error_return (0,
+                                  "Can't create tenant with id %d"
+                                  " (already exists with index %d)",
+                                  tenant_id, kv.value);
+       }
+    }
+  else
+    {
+      if (clib_bihash_search_inline_8_8 (&sfdp->tenant_idx_by_id, &kv))
+       {
+         err = clib_error_return (0,
+                                  "Can't delete tenant with id %d"
+                                  " (not found)",
+                                  tenant_id);
+       }
+      else
+       {
+         sfdp_tenant_clear_counters (sfdp, kv.value);
+         pool_put_index (sfdp->tenants, kv.value);
+         clib_bihash_add_del_8_8 (&sfdp->tenant_idx_by_id, &kv, 0);
+         /* TODO: Notify other users of "tenants" (like gw)?
+          * maybe cb list? */
+       }
+    }
+  if (!err && ((n_tenants == 1 && is_del) || (n_tenants == 0 && !is_del)))
+    sfdp_enable_disable_expiry (is_del);
+  return err;
+}
+
+clib_error_t *
+sfdp_set_services (sfdp_main_t *sfdp, u32 tenant_id, sfdp_bitmap_t bitmap,
+                  u8 direction)
+{
+  sfdp_init_main_if_needed (sfdp);
+  clib_bihash_kv_8_8_t kv = { .key = tenant_id, .value = 0 };
+  sfdp_tenant_t *tenant;
+  if (clib_bihash_search_inline_8_8 (&sfdp->tenant_idx_by_id, &kv))
+    return clib_error_return (
+      0, "Can't assign service map: tenant id %d not found", tenant_id);
+
+  tenant = sfdp_tenant_at_index (sfdp, kv.value);
+  tenant->bitmaps[direction] = bitmap;
+  return 0;
+}
+
+clib_error_t *
+sfdp_set_timeout (sfdp_main_t *sfdp, u32 tenant_id, u32 timeout_idx,
+                 u32 timeout_val)
+{
+  sfdp_init_main_if_needed (sfdp);
+  clib_bihash_kv_8_8_t kv = { .key = tenant_id, .value = 0 };
+  sfdp_tenant_t *tenant;
+  if (clib_bihash_search_inline_8_8 (&sfdp->tenant_idx_by_id, &kv))
+    return clib_error_return (
+      0, "Can't configure timeout: tenant id %d not found", tenant_id);
+  tenant = sfdp_tenant_at_index (sfdp, kv.value);
+  tenant->timeouts[timeout_idx] = timeout_val;
+  return 0;
+}
+
+clib_error_t *
+sfdp_set_sp_node (sfdp_main_t *sfdp, u32 tenant_id, u32 sp_index,
+                 u32 node_index)
+{
+  sfdp_init_main_if_needed (sfdp);
+  clib_bihash_kv_8_8_t kv = { .key = tenant_id, .value = 0 };
+  sfdp_tenant_t *tenant;
+  if (clib_bihash_search_inline_8_8 (&sfdp->tenant_idx_by_id, &kv))
+    return clib_error_return (
+      0, "Can't configure slow path node: tenant id %d not found", tenant_id);
+  tenant = sfdp_tenant_at_index (sfdp, kv.value);
+  tenant->sp_node_indices[sp_index] = node_index;
+  return 0;
+}
+
+clib_error_t *
+sfdp_set_icmp_error_node (sfdp_main_t *sfdp, u32 tenant_id, u8 is_ip6,
+                         u32 node_index)
+{
+  sfdp_init_main_if_needed (sfdp);
+  vlib_main_t *vm = vlib_get_main ();
+  clib_bihash_kv_8_8_t kv = { .key = tenant_id, .value = 0 };
+  sfdp_tenant_t *tenant;
+  uword next_index;
+  if (clib_bihash_search_inline_8_8 (&sfdp->tenant_idx_by_id, &kv))
+    return clib_error_return (
+      0, "Can't configure icmp error node: tenant id %d not found", tenant_id);
+  tenant = sfdp_tenant_at_index (sfdp, kv.value);
+  if (is_ip6)
+    {
+      next_index =
+       vlib_node_add_next (vm, sfdp_lookup_ip6_icmp_node.index, node_index);
+      tenant->icmp6_lookup_next = next_index;
+    }
+  else
+    {
+      next_index =
+       vlib_node_add_next (vm, sfdp_lookup_ip4_icmp_node.index, node_index);
+      tenant->icmp4_lookup_next = next_index;
+    }
+  return 0;
+}
+
+int
+sfdp_create_session (vlib_main_t *vm, vlib_buffer_t *b, u32 context_id,
+                    u32 thread_index, u32 tenant_index, u32 *session_index,
+                    int is_ipv6)
+{
+  return sfdp_create_session_with_scope_index (
+    vm, b, context_id, thread_index, tenant_index, session_index, 0, is_ipv6);
+}
+
+int
+sfdp_create_session_with_scope_index (vlib_main_t *vm, vlib_buffer_t *b,
+                                     u32 context_id, u32 thread_index,
+                                     u32 tenant_index, u32 *session_index,
+                                     u32 scope_index, int is_ipv6)
+{
+  sfdp_main_t *sfdp = &sfdp_main;
+  sfdp_session_ip4_key_t k4 = {};
+  sfdp_session_ip6_key_t k6 = {};
+  void *k = is_ipv6 ? (void *) &k6 : (void *) &k4;
+  u64 lookup_val = 0, h = 0;
+  i16 l4_hdr_offset = 0;
+  u8 slow_path = 0;
+  sfdp_tenant_t *tenant = sfdp_tenant_at_index (sfdp, tenant_index);
+  sfdp_per_thread_data_t *ptd = 0;
+  f64 time_now = vlib_time_now (vm);
+  u8 bound_to_thread = (u16) thread_index != SFDP_UNBOUND_THREAD_INDEX;
+
+  if (bound_to_thread)
+    ptd = vec_elt_at_index (sfdp->per_thread_data, thread_index);
+
+  if (is_ipv6)
+    {
+      sfdp_calc_key_v6 (b, context_id, k, &lookup_val, &h, &l4_hdr_offset,
+                       slow_path);
+    }
+  else
+    {
+      sfdp_calc_key_v4 (b, context_id, k, &lookup_val, &h, &l4_hdr_offset,
+                       slow_path);
+    }
+  int err = sfdp_create_session_inline (sfdp, ptd, tenant, tenant_index,
+                                       thread_index, time_now, k, &h,
+                                       &lookup_val, scope_index, is_ipv6);
+
+  if (bound_to_thread && err == 0)
+    {
+      *session_index = sfdp_session_index_from_lookup (lookup_val);
+      sfdp_notify_new_sessions (sfdp, session_index, 1);
+    }
+  return err;
+}
+
+void
+sfdp_normalise_ip4_key (sfdp_session_t *session,
+                       sfdp_session_ip4_key_t *result, u8 key_idx)
+{
+  sfdp_session_ip4_key_t *skey = &session->keys[key_idx].key4;
+  sfdp_ip4_key_t *key = &skey->ip4_key;
+  u8 pseudo_dir = session->pseudo_dir[key_idx];
+  u8 proto = session->proto;
+  u8 with_port = proto == IP_PROTOCOL_UDP || proto == IP_PROTOCOL_TCP ||
+                proto == IP_PROTOCOL_ICMP;
+
+  result->ip4_key.as_u64x2 = key->as_u64x2;
+  result->as_u64 = skey->as_u64;
+  if (with_port && pseudo_dir)
+    {
+      result->ip4_key.ip_addr_lo = key->ip_addr_hi;
+      result->ip4_key.port_lo = clib_net_to_host_u16 (key->port_hi);
+      result->ip4_key.ip_addr_hi = key->ip_addr_lo;
+      result->ip4_key.port_hi = clib_net_to_host_u16 (key->port_lo);
+    }
+  else
+    {
+      result->ip4_key.ip_addr_lo = key->ip_addr_lo;
+      result->ip4_key.port_lo = clib_net_to_host_u16 (key->port_lo);
+      result->ip4_key.ip_addr_hi = key->ip_addr_hi;
+      result->ip4_key.port_hi = clib_net_to_host_u16 (key->port_hi);
+    }
+}
+
+void
+sfdp_normalise_ip6_key (sfdp_session_t *session,
+                       sfdp_session_ip6_key_t *result, u8 key_idx)
+{
+  sfdp_session_ip6_key_t *skey = &session->keys[key_idx].key6;
+  sfdp_ip6_key_t *key = &skey->ip6_key;
+  u8 pseudo_dir = session->pseudo_dir[key_idx];
+  u8 proto = session->proto;
+  u8 with_port = proto == IP_PROTOCOL_UDP || proto == IP_PROTOCOL_TCP ||
+                proto == IP_PROTOCOL_ICMP;
+
+  result->ip6_key.as_u64x4 = key->as_u64x4;
+  result->as_u64 = skey->as_u64;
+  if (with_port && pseudo_dir)
+    {
+      result->ip6_key.ip6_addr_lo = key->ip6_addr_hi;
+      result->ip6_key.port_lo = clib_net_to_host_u16 (key->port_hi);
+      result->ip6_key.ip6_addr_hi = key->ip6_addr_lo;
+      result->ip6_key.port_hi = clib_net_to_host_u16 (key->port_lo);
+    }
+  else
+    {
+      result->ip6_key.ip6_addr_lo = key->ip6_addr_lo;
+      result->ip6_key.port_lo = clib_net_to_host_u16 (key->port_lo);
+      result->ip6_key.ip6_addr_hi = key->ip6_addr_hi;
+      result->ip6_key.port_hi = clib_net_to_host_u16 (key->port_hi);
+    }
+}
+
+int
+sfdp_bihash_add_del_inline_with_hash_24_8 (clib_bihash_24_8_t *h,
+                                          clib_bihash_kv_24_8_t *kv, u64 hash,
+                                          u8 is_add)
+{
+  return clib_bihash_add_del_inline_with_hash_24_8 (h, kv, hash, is_add, 0, 0,
+                                                   0, 0);
+}
+
+int
+sfdp_bihash_add_del_inline_with_hash_48_8 (clib_bihash_48_8_t *h,
+                                          clib_bihash_kv_48_8_t *kv, u64 hash,
+                                          u8 is_add)
+{
+  return clib_bihash_add_del_inline_with_hash_48_8 (h, kv, hash, is_add, 0, 0,
+                                                   0, 0);
+}
+
+static clib_error_t *
+sfdp_config (vlib_main_t *vm, unformat_input_t *input)
+{
+  sfdp_main_t *sfdp = &sfdp_main;
+  u32 eviction_sessions_margin = ~0;
+  u8 sessions_cache_specified = 0;
+
+  while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+    {
+      if (unformat (input, "sessions-log2 %u", &sfdp->log2_sessions))
+       ;
+      else if (unformat (input, "sessions-per-thread-cache-log2 %u",
+                        &sfdp->log2_sessions_cache_per_thread))
+       {
+         sessions_cache_specified = 1;
+       }
+      else if (unformat (input, "tenants-log2 %u", &sfdp->log2_tenants))
+       ;
+      else if (unformat (input, "eviction-sessions-margin %u",
+                        &eviction_sessions_margin))
+       ;
+      else if (unformat (input, "no-main"))
+       {
+         /* Disable only if there are workers */
+         if (vlib_num_workers ())
+           sfdp->no_main = 1;
+         else
+           clib_warning ("Ignoring no-main option: no workers");
+       }
+      else
+       {
+         return clib_error_return (0, "Invalid SFDP plugin config");
+       }
+    }
+
+  if (!sessions_cache_specified)
+    {
+      if (sfdp->log2_sessions > SFDP_DEFAULT_LOG2_SESSIONS_CACHE_RATIO + 4)
+       {
+         sfdp->log2_sessions_cache_per_thread =
+           sfdp->log2_sessions - SFDP_DEFAULT_LOG2_SESSIONS_CACHE_RATIO;
+       }
+      else
+       {
+         /* If the total number of sessions is really small (can happen in
+          * tests) we don't use session caching by default to protect against
+          * exhaustion. */
+         sfdp->log2_sessions_cache_per_thread = 0;
+       }
+    }
+
+  sfdp->eviction_sessions_margin = eviction_sessions_margin;
+
+  return 0;
+}
+
+/* sfdp { [sessions-log2 <n>] [tenants-log2 <n>] [eviction-sessions-margin <n>]
+ * } config. */
+VLIB_EARLY_CONFIG_FUNCTION (sfdp_config, "sfdp");
+
+VLIB_INIT_FUNCTION (sfdp_init);
+
+VLIB_PLUGIN_REGISTER () = {
+  .version = SFDP_CORE_PLUGIN_BUILD_VER,
+  .description = "sfdp Core Plugin",
+};
diff --git a/src/vnet/sfdp/sfdp.h b/src/vnet/sfdp/sfdp.h
new file mode 100644 (file)
index 0000000..080df28
--- /dev/null
@@ -0,0 +1,891 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright (c) 2025 Cisco Systems, Inc.
+ */
+
+#ifndef __included_sfdp_h__
+#define __included_sfdp_h__
+
+#include <vnet/vnet.h>
+#include <vnet/ip/ip.h>
+#include <vnet/ethernet/ethernet.h>
+
+#include <vppinfra/hash.h>
+#include <vppinfra/error.h>
+#include <vppinfra/elog.h>
+
+#include <vppinfra/bihash_24_8.h>
+#include <vppinfra/bihash_48_8.h>
+#include <vppinfra/bihash_8_8.h>
+
+#include <vppinfra/format_table.h>
+
+#include <vnet/sfdp/expiry/expiry.h>
+#include <vnet/sfdp/common.h>
+#include <vnet/sfdp/callbacks.h>
+
+/* Sessions constants */
+#define SFDP_DEFAULT_LOG2_SESSIONS 19 /* 500k sessions */
+#define SFDP_DEFAULT_LOG2_SESSIONS_CACHE_RATIO                                \
+  7                                 /* 1/128 cached sessions per thread */
+#define SFDP_LOG2_MEM_PER_SESSION 12 /* 4kB per session */
+
+/* Tenants constants */
+#define SFDP_DEFAULT_LOG2_TENANTS 15 /* 32k tenants */
+#define SFDP_LOG2_MEM_PER_TENANT  6  /* 64B per tenant */
+
+#define SFDP_SESSION_ID_TOTAL_BITS   64
+#define SFDP_SESSION_ID_EPOCH_N_BITS 16
+
+#define SFDP_BITMAP_SIZE                       64
+#define SFDP_LOOKUP_NEXT_INDEX_FOR_SCOPE(scope) (scope + SFDP_BITMAP_SIZE)
+
+/* Convention session_index is 31 bit
+ * Flow_index (embedded in vlib_buffer_t as "flow_id")
+ * Flow_index = (session_index << 1) + !(is_forward)
+
+ * A flow is "forward" if it's going from initiator to responder
+ * The packet_direction is 1 if normalisation happened 0 otherwise
+ * the stored_direction of a flow is the packet direction of its FSOL
+ * Pseudo_flow_index = (session_index << 1) + stored_direction
+ *
+ * Note that for a packet belonging to a flow
+ * ----------------------------------------------------------
+ *     !(is_forward) = packet_direction ^ stored_direction
+ *        Flow_index = Pseudo_flow_index ^ stored_direction
+ * ----------------------------------------------------------
+ */
+
+typedef enum
+{
+  SFDP_SESSION_TYPE_IP4,
+  SFDP_SESSION_TYPE_IP6,
+  SFDP_SESSION_TYPE_USER,
+  /* last */
+  SFDP_SESSION_N_TYPES,
+} sfdp_session_type_t;
+
+#define foreach_sfdp_session_state                                            \
+  _ (FSOL, "embryonic")                                                       \
+  _ (ESTABLISHED, "established")                                              \
+  _ (TIME_WAIT, "time-wait")                                                  \
+  /* Free session does not belong to main pool anymore, but is unused */      \
+  _ (FREE, "free")
+
+typedef enum
+{
+#define _(val, str) SFDP_SESSION_STATE_##val,
+  foreach_sfdp_session_state
+#undef _
+    SFDP_SESSION_N_STATE
+} sfdp_session_state_t;
+
+#define foreach_sfdp_flow_counter _ (LOOKUP, "lookup")
+
+typedef enum
+{
+#define _(x, y) SFDP_FLOW_COUNTER_##x,
+  foreach_sfdp_flow_counter
+#undef _
+    SFDP_FLOW_N_COUNTER
+} sfdp_flow_counter_index_t;
+
+#define foreach_sfdp_tenant_session_counter                                   \
+  _ (CREATED, "created", "created sessions")                                  \
+  _ (REMOVED, "removed", "removed sessions")
+
+#define foreach_sfdp_tenant_data_counter                                      \
+  _ (INCOMING, "incoming", "incoming data into tenant")                       \
+  _ (OUTGOING, "outgoing", "outgoing data out of tenant")
+
+typedef enum
+{
+#define _(x, y, z) SFDP_TENANT_SESSION_COUNTER_##x,
+  foreach_sfdp_tenant_session_counter
+#undef _
+    SFDP_TENANT_SESSION_N_COUNTER
+} sfdp_tenant_session_counter_index_t;
+
+typedef enum
+{
+#define _(x, y, z) SFDP_TENANT_DATA_COUNTER_##x,
+  foreach_sfdp_tenant_data_counter
+#undef _
+    SFDP_TENANT_DATA_N_COUNTER
+} sfdp_tenant_data_counter_index_t;
+
+enum
+{
+  SFDP_FLOW_FORWARD = 0,
+  SFDP_FLOW_REVERSE = 1,
+  SFDP_FLOW_F_B_N = 2
+};
+
+enum
+{
+  SFDP_SESSION_KEY_PRIMARY,
+  SFDP_SESSION_KEY_SECONDARY,
+  SFDP_SESSION_N_KEY
+};
+/* Flags to determine key validity in the session */
+#define foreach_sfdp_session_key_flag                                         \
+  _ (PRIMARY_VALID_IP4, 0x1, "primary-valid-ip4")                             \
+  _ (PRIMARY_VALID_IP6, 0x2, "primary-valid-ip6")                             \
+  _ (SECONDARY_VALID_IP4, 0x4, "secondary-valid-ip4")                         \
+  _ (SECONDARY_VALID_IP6, 0x8, "secondary-valid-ip6")                         \
+  _ (PRIMARY_VALID_USER, 0x10, "primary-valid-user")                          \
+  _ (SECONDARY_VALID_USER, 0x20, "secondary-valid-user")
+
+enum
+{
+#define _(x, n, s) SFDP_SESSION_KEY_FLAG_##x = n,
+  foreach_sfdp_session_key_flag
+#undef _
+};
+
+#define foreach_sfdp_sp_node                                                  \
+  _ (IP4_REASS, "error-drop", "sp-ip4-reassembly")                            \
+  _ (IP6_REASS, "error-drop", "sp-ip6-reassembly")                            \
+  _ (IP4_UNKNOWN_PROTO, "error-drop", "sp-ip4-unknown-proto")                 \
+  _ (IP6_UNKNOWN_PROTO, "error-drop", "sp-ip6-unknown-proto")                 \
+  _ (IP4_ICMP4_ERROR, "error-drop", "sp-ip4-icmp4-error")                     \
+  _ (IP6_ICMP6_ERROR, "error-drop", "sp-ip4-icmp6-error")                     \
+  _ (IP4_TABLE_OVERFLOW, "error-drop", "sp-ip4-table-overflow")               \
+  _ (IP6_TABLE_OVERFLOW, "error-drop", "sp-ip6-table-overflow")
+
+enum
+{
+#define _(name, val, str) SFDP_SP_NODE_##name,
+  foreach_sfdp_sp_node
+#undef _
+    SFDP_N_SP_NODES
+};
+
+typedef union
+{
+  struct
+  {
+    union
+    {
+      u32 spi;
+      struct
+      {
+       u16 port_lo;
+       u16 port_hi;
+      };
+    };
+    u8 unused;
+    u8 proto;
+    u16 unused2;
+    u32 ip_addr_lo;
+    u32 ip_addr_hi;
+  };
+  u8x16u as_u8x16;
+  u32x4u as_u32x4;
+  u64x2u as_u64x2;
+} __clib_packed sfdp_ip4_key_t;
+STATIC_ASSERT_SIZEOF (sfdp_ip4_key_t, 16);
+
+typedef union
+{
+  struct
+  {
+    union
+    {
+      u32 spi;
+      struct
+      {
+       u16 port_lo;
+       u16 port_hi;
+      };
+    };
+    u16 unused;
+    u8 proto;
+    u8 unused2;
+    ip6_address_t ip6_addr_lo;
+    ip6_address_t ip6_addr_hi;
+  };
+  struct
+  {
+    u32x2u as_u32x2;
+    u32x8u as_u32x8;
+  };
+  struct
+  {
+    u16x4u as_u16x4;
+    u16x16u as_u16x16;
+  };
+  struct
+  {
+    u8x8u as_u8x8;
+    u8x16u as_u8x16[2];
+  };
+  struct
+  {
+    u64 as_u64;
+    u64x4u as_u64x4;
+  };
+} __clib_packed sfdp_ip6_key_t;
+STATIC_ASSERT_SIZEOF (sfdp_ip6_key_t, 40);
+
+typedef struct
+{
+  sfdp_ip4_key_t ip4_key;
+
+  union
+  {
+    struct
+    {
+      u32 context_id;
+      u8 zeros[4];
+    };
+    u64 as_u64;
+  };
+} __clib_packed sfdp_session_ip4_key_t;
+STATIC_ASSERT_SIZEOF (sfdp_session_ip4_key_t, 24);
+
+typedef struct
+{
+  sfdp_ip6_key_t ip6_key;
+
+  union
+  {
+    struct
+    {
+      u32 context_id;
+      u8 zeros[4];
+    };
+    u64 as_u64;
+  };
+} __clib_packed sfdp_session_ip6_key_t;
+STATIC_ASSERT_SIZEOF (sfdp_session_ip6_key_t, 48);
+
+typedef union
+{
+  sfdp_session_ip4_key_t key4;
+  sfdp_session_ip6_key_t key6;
+} sfdp_session_ip46_key_t;
+
+typedef union
+{
+  sfdp_ip4_key_t key4;
+  sfdp_ip6_key_t key6;
+} sfdp_ip46_key_t;
+
+typedef union
+{
+  clib_bihash_kv_24_8_t kv4;
+  clib_bihash_kv_48_8_t kv6;
+} sfdp_bihash_kv46_t;
+
+#define SFDP_SESSION_IP46_KEYS_TYPE(n)                                        \
+  union                                                                       \
+  {                                                                           \
+    sfdp_session_ip4_key_t keys4[(n)];                                        \
+    sfdp_session_ip6_key_t keys6[(n)];                                        \
+  }
+
+#define SFDP_UNBOUND_THREAD_INDEX ((u16) ~0)
+typedef struct sfdp_session
+{
+  CLIB_CACHE_LINE_ALIGN_MARK (cache0);
+  sfdp_bitmap_t bitmaps[SFDP_FLOW_F_B_N];
+  u64 session_id;
+  u64 expiry_opaque[2];
+  session_version_t session_version;
+  u8 state; /* see sfdp_session_state_t */
+  u8 proto;
+  u16 tenant_idx;
+  u16 owning_thread_index;
+  u8 unused0[16];
+  u8 pseudo_dir[SFDP_SESSION_N_KEY];
+  u8 type; /* see sfdp_session_type_t */
+  u8 key_flags;
+  u16 parser_index[SFDP_SESSION_N_KEY];
+  u8 scope_index;
+  u8 unused1[55];
+  CLIB_CACHE_LINE_ALIGN_MARK (cache1);
+  union
+  {
+    sfdp_session_ip46_key_t keys[SFDP_SESSION_N_KEY];
+    u8 keys_data[SFDP_SESSION_N_KEY][64];
+  };
+} sfdp_session_t; /* TODO: optimise mem layout, this is bad */
+#if CLIB_CACHE_LINE_BYTES == 64
+STATIC_ASSERT ((STRUCT_OFFSET_OF (sfdp_session_t, cache1) -
+               STRUCT_OFFSET_OF (sfdp_session_t, cache0)) ==
+                2 * CLIB_CACHE_LINE_BYTES,
+              "cache line alignment is broken for sfdp_session_t");
+#else
+STATIC_ASSERT ((STRUCT_OFFSET_OF (sfdp_session_t, cache1) -
+               STRUCT_OFFSET_OF (sfdp_session_t, cache0)) ==
+                CLIB_CACHE_LINE_BYTES,
+              "cache line alignment is broken for sfdp_session_t");
+#endif
+
+/* The members of the second cacheline are bigger than 64 bytes, thus due to
+ * the alignment constraints, the struct size depends on the cacheline size. */
+#if CLIB_CACHE_LINE_BYTES == 64
+STATIC_ASSERT_SIZEOF (sfdp_session_t, 4 * CLIB_CACHE_LINE_BYTES);
+#else
+STATIC_ASSERT_SIZEOF (sfdp_session_t, 2 * CLIB_CACHE_LINE_BYTES);
+#endif
+
+always_inline void *
+sfdp_get_session_expiry_opaque (sfdp_session_t *s)
+{
+  return (void *) s->expiry_opaque;
+}
+
+typedef struct
+{
+  u32 *expired_sessions; // per thread expired session vector
+  u64 session_id_ctr;
+  u64 session_id_template;
+  u32 *session_freelist;
+  u32 n_sessions; /* Number of sessions belonging to this thread */
+} sfdp_per_thread_data_t;
+
+// TODO: Find a way to abstract, or share, timeout definition.
+//       They should be either private to timer.h, or sharable between them.
+
+/* Per-tenant timeout type */
+
+typedef struct sfdp_timeout
+{
+  const char *name; // Timeout name used to parse config and display
+  u32 val;         // Timeout value used when creating a new tenant
+} sfdp_timeout_t;
+
+STATIC_ASSERT_SIZEOF (sfdp_timeout_t[8], 16 * 8);
+
+/* Maximum number of tenant timers configurable */
+#define SFDP_MAX_TIMEOUTS 8
+
+typedef struct
+{
+  u32 tenant_id;
+  u32 context_id;
+  sfdp_bitmap_t bitmaps[SFDP_FLOW_F_B_N];
+  u32 timeouts[SFDP_MAX_TIMEOUTS];
+  u32 sp_node_indices[SFDP_N_SP_NODES];
+  uword icmp4_lookup_next;
+  uword icmp6_lookup_next;
+
+} sfdp_tenant_t;
+
+typedef struct
+{
+  /* key = (u64) tenant_id; val= (u64) tenant_idx; */
+  clib_bihash_8_8_t tenant_idx_by_id;
+
+  /* (sfdp_session_ip4_key_t) -> (thread_index(32 MSB),session_index(31 bits),
+   * stored_direction (1 LSB)) */
+  clib_bihash_24_8_t table4;
+
+  /* (sfdp_session_ip6_key_t) -> (thread_index(32 MSB),session_index(31 bits),
+   * stored_direction (1 LSB)) */
+  clib_bihash_48_8_t table6;
+  clib_bihash_8_8_t session_index_by_id;
+  clib_spinlock_t session_lock;
+  sfdp_session_t *sessions; /* fixed pool */
+  u32 free_sessions;
+  vlib_combined_counter_main_t per_session_ctr[SFDP_FLOW_N_COUNTER];
+  u32 *frame_queue_index_per_scope;
+  uword *handoff_node_index_per_scope;
+  uword *ip4_lookup_node_index_per_scope;
+  uword *ip6_lookup_node_index_per_scope;
+  uword **parser_node_index_per_scope_per_original;
+  u32 icmp4_error_frame_queue_index;
+  u32 icmp6_error_frame_queue_index;
+  u64 session_id_ctr_mask;
+  vlib_simple_counter_main_t tenant_session_ctr[SFDP_TENANT_SESSION_N_COUNTER];
+  vlib_combined_counter_main_t tenant_data_ctr[SFDP_TENANT_DATA_N_COUNTER];
+
+  /* pool of tenants */
+  sfdp_tenant_t *tenants;
+
+  /* per-thread data */
+  sfdp_per_thread_data_t *per_thread_data;
+  u16 msg_id_base;
+  sfdp_expiry_callbacks_t expiry_callbacks;
+
+  /* Timer names and defaults.
+   * Timers with name equal to NULL are not configured. */
+  sfdp_timeout_t timeouts[SFDP_MAX_TIMEOUTS];
+
+  u32 log2_sessions;
+  u32 log2_sessions_cache_per_thread;
+  u32 log2_tenants;
+
+  /* Per-thread number of sessions margin before eviction.
+   * See sfdp_set_eviction_sessions_margin function more information. */
+  u32 eviction_sessions_margin;
+
+  /* If this is set, don't run polling nodes on main */
+  int no_main;
+} sfdp_main_t;
+
+typedef struct
+{
+  u32 scope_index;
+} sfdp_lookup_node_runtime_data_t;
+
+#define sfdp_foreach_timeout(sfdp, timeout)                                   \
+  for (timeout = (sfdp)->timeouts;                                            \
+       timeout < (sfdp)->timeouts + SFDP_MAX_TIMEOUTS; timeout++)
+
+#define sfdp_foreach_session(sfdp, i, s)                                      \
+  pool_foreach_index (i, (sfdp)->sessions)                                    \
+    if ((s = sfdp_session_at_index (i)) && s->state != SFDP_SESSION_STATE_FREE)
+
+extern sfdp_main_t sfdp_main;
+extern vlib_node_registration_t sfdp_handoff_node;
+extern vlib_node_registration_t sfdp_lookup_ip4_icmp_node;
+extern vlib_node_registration_t sfdp_lookup_ip6_icmp_node;
+extern vlib_node_registration_t sfdp_lookup_ip4_node;
+extern vlib_node_registration_t sfdp_lookup_ip6_node;
+format_function_t format_sfdp_session;
+format_function_t format_sfdp_ipv4_context_id;
+format_function_t format_sfdp_ipv4_ingress;
+format_function_t format_sfdp_ipv4_egress;
+format_function_t format_sfdp_ipv6_context_id;
+format_function_t format_sfdp_ipv6_ingress;
+format_function_t format_sfdp_ipv6_egress;
+format_function_t format_sfdp_session_detail;
+format_function_t format_sfdp_session_state;
+format_function_t format_sfdp_session_type;
+format_function_t format_sfdp_tenant;
+format_function_t format_sfdp_tenant_extra;
+format_function_t format_sfdp_sp_node;
+unformat_function_t unformat_sfdp_service;
+unformat_function_t unformat_sfdp_service_bitmap;
+unformat_function_t unformat_sfdp_sp_node;
+unformat_function_t unformat_sfdp_timeout_name;
+
+static_always_inline u64
+sfdp_num_sessions ()
+{
+  return (1ULL << (sfdp_main.log2_sessions));
+}
+
+static_always_inline u64
+sfdp_num_sessions_cache_per_thread ()
+{
+  return (1ULL << (sfdp_main.log2_sessions_cache_per_thread));
+}
+
+static_always_inline int
+sfdp_table_is_full ()
+{
+  /* Note: We use >= to be on the safe side... */
+  return pool_elts (sfdp_main.sessions) >= sfdp_num_sessions ();
+}
+
+static_always_inline u64
+sfdp_real_active_sessions ()
+{
+  u64 sessions = pool_elts (sfdp_main.sessions);
+  sfdp_per_thread_data_t *ptd;
+  vec_foreach (ptd, sfdp_main.per_thread_data)
+    {
+      sessions -= vec_len (ptd->session_freelist);
+    }
+  return sessions;
+}
+
+// Number of sessions that can be allocated by threads in the global pool
+static_always_inline u64
+sfdp_remaining_sessions_in_pool ()
+{
+  return sfdp_num_sessions () - pool_elts (sfdp_main.sessions);
+}
+
+// Return the number of sessions that this thread should be able to allocate
+static_always_inline u64
+sfdp_sessions_available_for_this_thread (sfdp_per_thread_data_t *ptd)
+{
+  return sfdp_remaining_sessions_in_pool () + vec_len (ptd->session_freelist);
+}
+
+static_always_inline u64
+sfdp_session_num_thread_factor ()
+{
+  u32 n_workers = vlib_num_workers ();
+  return n_workers ? n_workers : 1;
+}
+
+static_always_inline u64
+sfdp_ip4_num_buckets ()
+{
+  return (1ULL << (sfdp_main.log2_sessions - 1));
+}
+
+static_always_inline u64
+sfdp_ip4_mem_size ()
+{
+  return (1ULL << (sfdp_main.log2_sessions + SFDP_LOG2_MEM_PER_SESSION));
+}
+
+static_always_inline u64
+sfdp_ip6_num_buckets ()
+{
+  return (1ULL << (sfdp_main.log2_sessions - 1));
+}
+
+static_always_inline u64
+sfdp_ip6_mem_size ()
+{
+  return (1ULL << (sfdp_main.log2_sessions + SFDP_LOG2_MEM_PER_SESSION));
+}
+
+static_always_inline u64
+sfdp_tenant_num_buckets ()
+{
+  return (1ULL << (sfdp_main.log2_tenants - 2));
+}
+
+static_always_inline u64
+sfdp_tenant_mem_size ()
+{
+  return (1ULL << (sfdp_main.log2_tenants + SFDP_LOG2_MEM_PER_TENANT));
+}
+
+static_always_inline sfdp_per_thread_data_t *
+sfdp_get_per_thread_data (u32 thread_index)
+{
+  return vec_elt_at_index (sfdp_main.per_thread_data, thread_index);
+}
+
+static_always_inline u32
+sfdp_session_index_from_lookup (u64 val)
+{
+  return (val & (~(u32) 0)) >> 1;
+}
+
+static_always_inline u8
+sfdp_thread_index_from_lookup (u64 val)
+{
+  return (val >> 32) & 0xFF;
+}
+
+static_always_inline u16
+sfdp_session_version_from_lookup (u64 val)
+{
+  return (val >> 48);
+}
+
+static_always_inline u32
+sfdp_packet_dir_from_lookup (u64 val)
+{
+  return val & 0x1;
+}
+
+static_always_inline u32
+sfdp_pseudo_flow_index_from_lookup (u64 val)
+{
+  return val & (~(u32) 0);
+}
+
+/** The format of the lookup value is composed of
+ *  1. 16 bits of session version
+ *  (8 bits of padding)
+ *  2. 8 bits of thread index
+ *  3. 32 bits of pseudo flow index
+ **/
+static_always_inline u64
+sfdp_session_mk_table_value (u8 thread_index, u32 pseudo_flow_index,
+                            session_version_t session_version)
+{
+  u64 value = 0;
+  value |= ((u64) session_version) << 48;
+  value |= ((u64) thread_index) << 32;
+  value |= (u64) pseudo_flow_index;
+  return value;
+}
+
+static_always_inline sfdp_session_t *
+sfdp_session_at_index (u32 idx)
+{
+  return pool_elt_at_index (sfdp_main.sessions, idx);
+}
+
+static_always_inline sfdp_session_t *
+sfdp_session_at_index_no_check (u32 idx)
+{
+  return sfdp_main.sessions + idx;
+}
+
+static_always_inline int
+sfdp_session_at_index_is_active (u32 idx)
+{
+  // TODO: We could use SFDP_SESSION_STATE_FREE alone maybe if its value was
+  // zero.
+  sfdp_main_t *sfdp = &sfdp_main;
+  return (!pool_is_free_index (sfdp->sessions, idx)) &&
+        (sfdp->sessions[idx].state != SFDP_SESSION_STATE_FREE);
+}
+
+static_always_inline sfdp_session_t *
+sfdp_session_at_index_if_valid (u32 idx)
+{
+  return sfdp_session_at_index_is_active (idx) ? sfdp_session_at_index (idx) :
+                                                NULL;
+}
+
+static_always_inline u32
+sfdp_mk_flow_index (u32 session_index, u8 dir)
+{
+  return (session_index << 1) | !(dir == SFDP_FLOW_FORWARD);
+}
+
+static_always_inline u32
+sfdp_session_from_flow_index (u32 flow_index)
+{
+  return flow_index >> 1;
+}
+
+static_always_inline u32
+sfdp_direction_from_flow_index (u32 flow_index)
+{
+  return (flow_index & 0x1);
+}
+
+static_always_inline sfdp_tenant_t *
+sfdp_tenant_at_index (sfdp_main_t *sfdpm, u32 idx)
+{
+  return pool_elt_at_index (sfdpm->tenants, idx);
+}
+
+static_always_inline u8
+sfdp_session_n_keys (sfdp_session_t *session)
+{
+  if (session->key_flags & (SFDP_SESSION_KEY_FLAG_SECONDARY_VALID_IP4 |
+                           SFDP_SESSION_KEY_FLAG_SECONDARY_VALID_IP6))
+    return 2;
+  else
+    return 1;
+}
+
+static_always_inline void
+sfdp_notify_new_sessions (sfdp_main_t *sfdpm, u32 *new_sessions, u32 len)
+{
+  sfdpm->expiry_callbacks.notify_new_sessions (new_sessions, len);
+  SFDP_CALLBACKS_CALL (notify_new_sessions, new_sessions, len);
+}
+
+static_always_inline void
+sfdp_notify_deleted_sessions (sfdp_main_t *sfdpm, u32 *deleted_sessions,
+                             u32 len)
+{
+  SFDP_CALLBACKS_CALL (notify_deleted_sessions, deleted_sessions, len);
+}
+
+static_always_inline u32
+sfdp_alloc_session (sfdp_main_t *sfdp, sfdp_per_thread_data_t *ptd,
+                   bool bound_to_thread)
+{
+  u32 res = ~0;
+  u32 n_local_elem;
+  sfdp_session_t *session;
+
+  if (bound_to_thread)
+    n_local_elem = vec_len (ptd->session_freelist);
+
+  if (bound_to_thread && n_local_elem)
+    res = vec_pop (ptd->session_freelist);
+  else
+    {
+      clib_spinlock_lock_if_init (&sfdp->session_lock);
+      if (sfdp->free_sessions)
+       {
+         pool_get (sfdp->sessions, session);
+         sfdp->free_sessions -= 1;
+         clib_spinlock_unlock_if_init (&sfdp->session_lock);
+         res = session - sfdp->sessions;
+       }
+      else
+       clib_spinlock_unlock_if_init (&sfdp->session_lock);
+    }
+  if (bound_to_thread && res != ~0)
+    ptd->n_sessions += 1;
+  return res;
+}
+
+static_always_inline void
+sfdp_free_session (sfdp_main_t *sfdp, sfdp_per_thread_data_t *ptd,
+                  u32 session_index)
+{
+  if (ptd &&
+      vec_len (ptd->session_freelist) < sfdp_num_sessions_cache_per_thread ())
+    vec_add1 (ptd->session_freelist, session_index);
+  else
+    {
+      clib_spinlock_lock_if_init (&sfdp->session_lock);
+      pool_put_index (sfdp->sessions, session_index);
+      sfdp->free_sessions += 1;
+      clib_spinlock_unlock_if_init (&sfdp->session_lock);
+    }
+  if (ptd)
+    ptd->n_sessions -= 1;
+}
+
+static_always_inline void
+sfdp_session_generate_and_set_id (sfdp_main_t *sfdp,
+                                 sfdp_per_thread_data_t *ptd,
+                                 sfdp_session_t *session)
+{
+  clib_bihash_kv_8_8_t kv2;
+  u64 value;
+  u32 session_idx = session - sfdp->sessions;
+  u32 pseudo_flow_idx = (session_idx << 1);
+  u32 thread_index = session->owning_thread_index;
+  u64 session_id = (ptd->session_id_ctr & (sfdp->session_id_ctr_mask)) |
+                  ptd->session_id_template;
+  ptd->session_id_ctr +=
+    2; /* two at a time, because last bit is reserved for direction */
+  session->session_id = session_id;
+  value = sfdp_session_mk_table_value (thread_index, pseudo_flow_idx,
+                                      session->session_version);
+  kv2.key = session_id;
+  kv2.value = value;
+  clib_bihash_add_del_8_8 (&sfdp->session_index_by_id, &kv2, 1);
+}
+
+/* Internal function to create a new session.
+ * sfdp_notify_new_sessions must be called afterward. If thread_index is ~0,
+ * the session is created with no assigned thread
+ * Return value: 0 --> SUCCESS
+                1 --> Unable to allocate session
+                2 --> Collision */
+static_always_inline int
+sfdp_create_session_inline (sfdp_main_t *sfdp, sfdp_per_thread_data_t *ptd,
+                           sfdp_tenant_t *tenant, u16 tenant_idx,
+                           u16 thread_index, f64 time_now, void *k, u64 *h,
+                           u64 *lookup_val, u32 scope_index, int is_ipv6)
+{
+  sfdp_bihash_kv46_t kv = {};
+  u64 value;
+  u8 proto;
+  sfdp_session_t *session;
+  u32 session_idx;
+  u32 pseudo_flow_idx;
+
+  session_idx =
+    sfdp_alloc_session (sfdp, ptd, thread_index != SFDP_UNBOUND_THREAD_INDEX);
+
+  if (session_idx == ~0)
+    return 1;
+
+  session = pool_elt_at_index (sfdp->sessions, session_idx);
+
+  pseudo_flow_idx = (lookup_val[0] & 0x1) | (session_idx << 1);
+  value = sfdp_session_mk_table_value (thread_index, pseudo_flow_idx,
+                                      session->session_version + 1);
+  if (is_ipv6)
+    {
+      clib_memcpy_fast (&kv.kv6.key, k, sizeof (kv.kv6.key));
+      kv.kv6.value = value;
+      proto = ((sfdp_session_ip6_key_t *) k)->ip6_key.proto;
+      if (clib_bihash_add_del_48_8 (&sfdp->table6, &kv.kv6, 2))
+       {
+         /* colision - remote thread created same entry */
+         sfdp_free_session (sfdp, ptd, session_idx);
+         return 2;
+       }
+      session->type = SFDP_SESSION_TYPE_IP6;
+      session->key_flags = SFDP_SESSION_KEY_FLAG_PRIMARY_VALID_IP6;
+    }
+  else
+    {
+      clib_memcpy_fast (&kv.kv4.key, k, sizeof (kv.kv4.key));
+      kv.kv4.value = value;
+      proto = ((sfdp_session_ip4_key_t *) k)->ip4_key.proto;
+      if (clib_bihash_add_del_24_8 (&sfdp->table4, &kv.kv4, 2))
+       {
+         /* colision - remote thread created same entry */
+         sfdp_free_session (sfdp, ptd, session_idx);
+         return 2;
+       }
+      session->type = SFDP_SESSION_TYPE_IP4;
+      session->key_flags = SFDP_SESSION_KEY_FLAG_PRIMARY_VALID_IP4;
+    }
+  // TODO: Would be nice to do this upon free instead to have avoid having to
+  // check
+  //       if the session is valid at all when checking invalidation.
+  session->session_version += 1;
+  session->tenant_idx = tenant_idx;
+  session->state = SFDP_SESSION_STATE_FSOL;
+  session->owning_thread_index = thread_index;
+  session->scope_index = scope_index;
+  if (ptd)
+    sfdp_session_generate_and_set_id (sfdp, ptd, session);
+
+  clib_memcpy_fast (session->bitmaps, tenant->bitmaps,
+                   sizeof (session->bitmaps));
+  if (is_ipv6)
+    clib_memcpy_fast (&session->keys[SFDP_SESSION_KEY_PRIMARY].key6, k,
+                     sizeof (session->keys[0].key6));
+  else
+    clib_memcpy_fast (&session->keys[SFDP_SESSION_KEY_PRIMARY].key4, k,
+                     sizeof (session->keys[0].key4));
+  session->pseudo_dir[SFDP_SESSION_KEY_PRIMARY] = lookup_val[0] & 0x1;
+  session->proto = proto;
+
+  lookup_val[0] ^= value;
+  /* Bidirectional counter zeroing */
+  vlib_zero_combined_counter (&sfdp->per_session_ctr[SFDP_FLOW_COUNTER_LOOKUP],
+                             lookup_val[0]);
+  vlib_zero_combined_counter (&sfdp->per_session_ctr[SFDP_FLOW_COUNTER_LOOKUP],
+                             lookup_val[0] | 0x1);
+  vlib_increment_simple_counter (
+    &sfdp->tenant_session_ctr[SFDP_TENANT_SESSION_COUNTER_CREATED],
+    thread_index, tenant_idx, 1);
+  return 0;
+}
+int sfdp_create_session (vlib_main_t *vm, vlib_buffer_t *b, u32 context_id,
+                        u32 thread_index, u32 tenant_index,
+                        u32 *session_index, int is_ipv6);
+int sfdp_create_session_with_scope_index (vlib_main_t *vm, vlib_buffer_t *b,
+                                         u32 context_id, u32 thread_index,
+                                         u32 tenant_index, u32 *session_index,
+                                         u32 scope_index, int is_ipv6);
+
+clib_error_t *sfdp_tenant_add_del (sfdp_main_t *sfdp, u32 tenant_id,
+                                  u32 context_id, u8 is_del);
+clib_error_t *sfdp_set_services (sfdp_main_t *sfdp, u32 tenant_id,
+                                sfdp_bitmap_t bitmap, u8 direction);
+clib_error_t *sfdp_set_timeout (sfdp_main_t *sfdp, u32 tenant_id,
+                               u32 timeout_idx, u32 timeout_val);
+
+clib_error_t *sfdp_set_sp_node (sfdp_main_t *sfdp, u32 tenant_id, u32 sp_index,
+                               u32 node_index);
+clib_error_t *sfdp_set_icmp_error_node (sfdp_main_t *sfdp, u32 tenant_id,
+                                       u8 is_ip6, u32 node_index);
+void sfdp_normalise_ip4_key (sfdp_session_t *session,
+                            sfdp_session_ip4_key_t *result, u8 key_idx);
+
+void sfdp_normalise_ip6_key (sfdp_session_t *session,
+                            sfdp_session_ip6_key_t *result, u8 key_idx);
+
+void sfdp_table_format_add_header_col (table_t *t);
+u32 sfdp_table_format_insert_session (table_t *t, u32 n, u32 session_index,
+                                     sfdp_session_t *session, u32 tenant_id,
+                                     f64 now);
+int sfdp_bihash_add_del_inline_with_hash_24_8 (clib_bihash_24_8_t *h,
+                                              clib_bihash_kv_24_8_t *kv,
+                                              u64 hash, u8 is_add);
+
+int sfdp_bihash_add_del_inline_with_hash_48_8 (clib_bihash_48_8_t *h,
+                                              clib_bihash_kv_48_8_t *kv,
+                                              u64 hash, u8 is_add);
+
+void sfdp_ip4_full_reass_custom_context_register_next_node (u16 node_index);
+void sfdp_ip6_full_reass_custom_context_register_next_node (u16 node_index);
+void
+sfdp_ip4_full_reass_custom_context_register_next_err_node (u16 node_index);
+void
+sfdp_ip6_full_reass_custom_context_register_next_err_node (u16 node_index);
+
+#define SFDP_CORE_PLUGIN_BUILD_VER "1.0"
+
+#endif /* __included_sfdp_h__ */
diff --git a/src/vnet/sfdp/sfdp_funcs.h b/src/vnet/sfdp/sfdp_funcs.h
new file mode 100644 (file)
index 0000000..b3e9525
--- /dev/null
@@ -0,0 +1,308 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright (c) 2025 Cisco Systems, Inc.
+ */
+
+#ifndef __included_sfdp_funcs_h__
+#define __included_sfdp_funcs_h__
+#include <vnet/sfdp/sfdp.h>
+#include <vnet/sfdp/lookup/sfdp_bihashes.h>
+
+static_always_inline void
+sfdp_session_remove (sfdp_main_t *sfdp, sfdp_per_thread_data_t *ptd,
+                    sfdp_session_t *session, u32 thread_index,
+                    u32 session_index)
+{
+  clib_bihash_kv_8_8_t kv2 = { 0 };
+  sfdp_bihash_kv46_t kv = { 0 };
+  __clib_aligned (CLIB_CACHE_LINE_BYTES)
+  u8 kvdata[SFDP_PARSER_MAX_KEY_SIZE + 8];
+  uword parser_key_size;
+  void *parser_table;
+  sfdp_parser_data_t *parser;
+  sfdp_parser_main_t *pm = &sfdp_parser_main;
+
+  kv2.key = session->session_id;
+  if (session->key_flags & SFDP_SESSION_KEY_FLAG_PRIMARY_VALID_IP4)
+    {
+      clib_memcpy_fast (&kv.kv4.key,
+                       &session->keys[SFDP_SESSION_KEY_PRIMARY].key4,
+                       sizeof (kv.kv4.key));
+      clib_bihash_add_del_24_8 (&sfdp->table4, &kv.kv4, 0);
+    }
+  if (session->key_flags & SFDP_SESSION_KEY_FLAG_SECONDARY_VALID_IP4)
+    {
+      clib_memcpy_fast (&kv.kv4.key,
+                       &session->keys[SFDP_SESSION_KEY_SECONDARY].key4,
+                       sizeof (kv.kv4.key));
+      clib_bihash_add_del_24_8 (&sfdp->table4, &kv.kv4, 0);
+    }
+  if (session->key_flags & SFDP_SESSION_KEY_FLAG_PRIMARY_VALID_IP6)
+    {
+      clib_memcpy_fast (&kv.kv6.key,
+                       &session->keys[SFDP_SESSION_KEY_PRIMARY].key6,
+                       sizeof (kv.kv6.key));
+      clib_bihash_add_del_48_8 (&sfdp->table6, &kv.kv6, 0);
+    }
+  if (session->key_flags & SFDP_SESSION_KEY_FLAG_SECONDARY_VALID_IP6)
+    {
+      clib_memcpy_fast (&kv.kv6.key,
+                       &session->keys[SFDP_SESSION_KEY_SECONDARY].key6,
+                       sizeof (kv.kv6.key));
+      clib_bihash_add_del_48_8 (&sfdp->table6, &kv.kv6, 0);
+    }
+  if (session->key_flags & SFDP_SESSION_KEY_FLAG_PRIMARY_VALID_USER)
+    {
+      parser = vec_elt_at_index (
+       pm->parsers, session->parser_index[SFDP_SESSION_KEY_PRIMARY]);
+      parser_key_size = parser->key_size;
+      parser_table = parser->bihash_table;
+      clib_memcpy_fast (kvdata, &session->keys_data[SFDP_SESSION_KEY_PRIMARY],
+                       parser_key_size);
+      SFDP_PARSER_BIHASH_CALL_FN (parser, sfdp_parser_bihash_add_del_fn,
+                                 parser_table, kvdata, 0);
+    }
+  if (session->key_flags & SFDP_SESSION_KEY_FLAG_SECONDARY_VALID_USER)
+    {
+      parser = vec_elt_at_index (
+       pm->parsers, session->parser_index[SFDP_SESSION_KEY_SECONDARY]);
+      parser_key_size = parser->key_size;
+      parser_table = parser->bihash_table;
+      clib_memcpy_fast (kvdata,
+                       &session->keys_data[SFDP_SESSION_KEY_SECONDARY],
+                       parser_key_size);
+      SFDP_PARSER_BIHASH_CALL_FN (parser, sfdp_parser_bihash_add_del_fn,
+                                 parser_table, kvdata, 0);
+    }
+  clib_bihash_add_del_8_8 (&sfdp->session_index_by_id, &kv2, 0);
+  vlib_increment_simple_counter (
+    &sfdp->tenant_session_ctr[SFDP_TENANT_SESSION_COUNTER_REMOVED],
+    thread_index, session->tenant_idx, 1);
+  session->state = SFDP_SESSION_STATE_FREE;
+  session->owning_thread_index = SFDP_UNBOUND_THREAD_INDEX;
+  sfdp_free_session (sfdp, ptd, session_index);
+}
+
+static_always_inline int
+sfdp_session_try_add_secondary_key (sfdp_main_t *sfdp, u32 thread_index,
+                                   u32 pseudo_flow_index,
+                                   sfdp_session_ip46_key_t *key,
+                                   ip46_type_t type, u64 *h)
+{
+  int rv;
+  sfdp_bihash_kv46_t kv;
+  u64 value;
+  sfdp_session_t *session;
+  u32 session_index;
+
+  session_index = sfdp_session_from_flow_index (pseudo_flow_index);
+  session = sfdp_session_at_index (session_index);
+  value = sfdp_session_mk_table_value (thread_index, pseudo_flow_index,
+                                      session->session_version);
+
+  if (type == IP46_TYPE_IP4)
+    {
+      kv.kv4.key[0] = key->key4.ip4_key.as_u64x2[0];
+      kv.kv4.key[1] = key->key4.ip4_key.as_u64x2[1];
+      kv.kv4.key[2] = key->key4.as_u64;
+      kv.kv4.value = value;
+      *h = clib_bihash_hash_24_8 (&kv.kv4);
+      if ((rv = sfdp_bihash_add_del_inline_with_hash_24_8 (
+            &sfdp->table4, &kv.kv4, *h, 2)) == 0)
+       {
+         session->keys[SFDP_SESSION_KEY_SECONDARY] = *key;
+         session->pseudo_dir[SFDP_SESSION_KEY_SECONDARY] =
+           pseudo_flow_index & 0x1;
+         session->key_flags |= SFDP_SESSION_KEY_FLAG_SECONDARY_VALID_IP4;
+       }
+    }
+  else
+    {
+      kv.kv6.key[0] = key->key6.ip6_key.as_u64;
+      kv.kv6.key[1] = key->key6.ip6_key.as_u64x4[0];
+      kv.kv6.key[2] = key->key6.ip6_key.as_u64x4[1];
+      kv.kv6.key[3] = key->key6.ip6_key.as_u64x4[2];
+      kv.kv6.key[4] = key->key6.ip6_key.as_u64x4[3];
+      kv.kv6.key[5] = key->key6.as_u64;
+      kv.kv6.value = value;
+      *h = clib_bihash_hash_48_8 (&kv.kv6);
+      if ((rv = sfdp_bihash_add_del_inline_with_hash_48_8 (
+            &sfdp->table6, &kv.kv6, *h, 2)) == 0)
+       {
+         session->keys[SFDP_SESSION_KEY_SECONDARY] = *key;
+         session->pseudo_dir[SFDP_SESSION_KEY_SECONDARY] =
+           pseudo_flow_index & 0x1;
+         session->key_flags |= SFDP_SESSION_KEY_FLAG_SECONDARY_VALID_IP6;
+       }
+    }
+
+  return rv;
+}
+
+static_always_inline int
+sfdp_parser_session_try_add_secondary_key_with_details (
+  void *table, uword key_size, uword parser_index, u32 thread_index,
+  u32 pseudo_flow_index, void *key, u64 *h)
+{
+  __clib_aligned (CLIB_CACHE_LINE_BYTES)
+  u8 kvdata[SFDP_PARSER_MAX_KEY_SIZE + 8];
+  int rv;
+  u64 value;
+  sfdp_session_t *session;
+  u32 session_index;
+  const struct
+  {
+    uword key_size;
+  } p = { .key_size = key_size };
+
+  session_index = sfdp_session_from_flow_index (pseudo_flow_index);
+  session = sfdp_session_at_index (session_index);
+  value = sfdp_session_mk_table_value (thread_index, pseudo_flow_index,
+                                      session->session_version);
+
+  clib_memcpy_fast (kvdata, key, key_size);
+  clib_memcpy_fast (kvdata + key_size, &value, sizeof (value));
+  *h = SFDP_PARSER_BIHASH_CALL_FN (&p, sfdp_parser_bihash_hash_fn, kvdata);
+  if ((rv = SFDP_PARSER_BIHASH_CALL_FN (&p, sfdp_parser_bihash_add_del_fn,
+                                       table, kvdata, 2)) == 0)
+    {
+      clib_memcpy_fast (session->keys_data[SFDP_SESSION_KEY_SECONDARY], kvdata,
+                       key_size);
+      session->pseudo_dir[SFDP_SESSION_KEY_SECONDARY] =
+       pseudo_flow_index & 0x1;
+      session->key_flags |= SFDP_SESSION_KEY_FLAG_SECONDARY_VALID_USER;
+      session->parser_index[SFDP_SESSION_KEY_SECONDARY] = parser_index;
+    }
+  return rv;
+}
+
+static_always_inline u8
+sfdp_renormalise_ip4_key (sfdp_session_ip4_key_t *key, u32 old_pseudo)
+{
+  if (clib_net_to_host_u32 (key->ip4_key.ip_addr_hi) <
+      clib_net_to_host_u32 (key->ip4_key.ip_addr_lo))
+    {
+      u32 tmp_ip4;
+      u16 tmp_port;
+      tmp_ip4 = key->ip4_key.ip_addr_hi;
+      tmp_port = key->ip4_key.port_hi;
+      key->ip4_key.ip_addr_hi = key->ip4_key.ip_addr_lo;
+      key->ip4_key.port_hi = key->ip4_key.port_lo;
+      key->ip4_key.ip_addr_lo = tmp_ip4;
+      key->ip4_key.port_lo = tmp_port;
+      old_pseudo ^= 0x1;
+    }
+  return old_pseudo;
+}
+
+static_always_inline void
+sfdp_session_bind_keys_to_thread (sfdp_session_t *session, u32 session_index,
+                                 u16 thread_index)
+{
+  clib_bihash_kv_24_8_t kv4;
+  clib_bihash_kv_48_8_t kv6;
+  __clib_aligned (CLIB_CACHE_LINE_BYTES)
+  u8 kvdata[SFDP_PARSER_MAX_KEY_SIZE + 8];
+  uword parser_key_size;
+  void *parser_table;
+  sfdp_parser_data_t *parser;
+  sfdp_main_t *sfdp = &sfdp_main;
+  sfdp_parser_main_t *pm = &sfdp_parser_main;
+  u32 fi = session_index << 1;
+
+  if (session->key_flags & SFDP_SESSION_KEY_FLAG_PRIMARY_VALID_IP4)
+    {
+      clib_memcpy_fast (kv4.key, &session->keys[SFDP_SESSION_KEY_PRIMARY].key4,
+                       sizeof (kv4.key));
+      kv4.value = sfdp_session_mk_table_value (
+       thread_index, fi | session->pseudo_dir[SFDP_SESSION_KEY_PRIMARY],
+       session->session_version);
+      clib_bihash_add_del_24_8 (&sfdp->table4, &kv4, 1);
+    }
+  if (session->key_flags & SFDP_SESSION_KEY_FLAG_PRIMARY_VALID_IP6)
+    {
+      clib_memcpy_fast (kv6.key, &session->keys[SFDP_SESSION_KEY_PRIMARY].key6,
+                       sizeof (kv6.key));
+      kv6.value = sfdp_session_mk_table_value (
+       thread_index, fi | session->pseudo_dir[SFDP_SESSION_KEY_PRIMARY],
+       session->session_version);
+      clib_bihash_add_del_48_8 (&sfdp->table6, &kv6, 1);
+    }
+  if (session->key_flags & SFDP_SESSION_KEY_FLAG_PRIMARY_VALID_USER)
+    {
+      parser = vec_elt_at_index (
+       pm->parsers, session->parser_index[SFDP_SESSION_KEY_PRIMARY]);
+      parser_key_size = parser->key_size;
+      parser_table = parser->bihash_table;
+      clib_memcpy_fast (kvdata, &session->keys_data[SFDP_SESSION_KEY_PRIMARY],
+                       parser_key_size);
+      ((u64u *) (kvdata + parser_key_size))[0] = sfdp_session_mk_table_value (
+       thread_index, fi | session->pseudo_dir[SFDP_SESSION_KEY_PRIMARY],
+       session->session_version);
+      SFDP_PARSER_BIHASH_CALL_FN (parser, sfdp_parser_bihash_add_del_fn,
+                                 parser_table, kvdata, 1);
+    }
+  if (session->key_flags & SFDP_SESSION_KEY_FLAG_SECONDARY_VALID_IP4)
+    {
+      clib_memcpy_fast (kv4.key,
+                       &session->keys[SFDP_SESSION_KEY_SECONDARY].key4,
+                       sizeof (kv4.key));
+      kv4.value = sfdp_session_mk_table_value (
+       thread_index, fi | session->pseudo_dir[SFDP_SESSION_KEY_SECONDARY],
+       session->session_version);
+      clib_bihash_add_del_24_8 (&sfdp->table4, &kv4, 1);
+    }
+  if (session->key_flags & SFDP_SESSION_KEY_FLAG_SECONDARY_VALID_IP6)
+    {
+      clib_memcpy_fast (kv6.key,
+                       &session->keys[SFDP_SESSION_KEY_SECONDARY].key6,
+                       sizeof (kv6.key));
+      kv6.value = sfdp_session_mk_table_value (
+       thread_index, fi | session->pseudo_dir[SFDP_SESSION_KEY_SECONDARY],
+       session->session_version);
+      clib_bihash_add_del_48_8 (&sfdp->table6, &kv6, 1);
+    }
+  if (session->key_flags & SFDP_SESSION_KEY_FLAG_SECONDARY_VALID_USER)
+    {
+      parser = vec_elt_at_index (
+       pm->parsers, session->parser_index[SFDP_SESSION_KEY_SECONDARY]);
+      parser_key_size = parser->key_size;
+      parser_table = parser->bihash_table;
+      clib_memcpy_fast (kvdata,
+                       &session->keys_data[SFDP_SESSION_KEY_SECONDARY],
+                       parser_key_size);
+      ((u64u *) (kvdata + parser_key_size))[0] = sfdp_session_mk_table_value (
+       thread_index, fi | session->pseudo_dir[SFDP_SESSION_KEY_SECONDARY],
+       session->session_version);
+      SFDP_PARSER_BIHASH_CALL_FN (parser, sfdp_parser_bihash_add_del_fn,
+                                 parser_table, kvdata, 1);
+    }
+}
+
+static_always_inline int
+sfdp_session_bind_to_thread (u32 session_index, u16 *thread_index,
+                            u8 new_session)
+{
+  sfdp_session_t *session = sfdp_session_at_index (session_index);
+  u16 expected = SFDP_UNBOUND_THREAD_INDEX;
+  sfdp_main_t *sfdp = &sfdp_main;
+  sfdp_per_thread_data_t *ptd =
+    vec_elt_at_index (sfdp->per_thread_data, *thread_index);
+
+  if (clib_atomic_cmp_and_swap_acq_relax_n (&session->owning_thread_index,
+                                           &expected, *thread_index, 0) != 0)
+    {
+      *thread_index = expected; /* Return the actual thread index */
+      return -1; /* The session was already bound to another thread */
+    }
+
+  ASSERT (*thread_index == vlib_get_thread_index ());
+
+  sfdp_session_bind_keys_to_thread (session, session_index, *thread_index);
+  if (new_session)
+    {
+      sfdp_notify_new_sessions (sfdp, &session_index, 1);
+      sfdp_session_generate_and_set_id (sfdp, ptd, session);
+    }
+  return 0;
+}
+#endif
diff --git a/src/vnet/sfdp/sfdp_types.api b/src/vnet/sfdp/sfdp_types.api
new file mode 100644 (file)
index 0000000..f884acb
--- /dev/null
@@ -0,0 +1,48 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright (c) 2025 Cisco Systems, Inc.
+ */
+
+option version = "0.0.1";
+import "vnet/ip/ip_types.api";
+
+enum sfdp_session_direction : u8
+{
+  SFDP_API_FORWARD = 0,
+  SFDP_API_REVERSE = 1,
+};
+
+enum sfdp_session_state : u8
+{
+  SFDP_API_SESSION_STATE_FSOL = 0,
+  SFDP_API_SESSION_STATE_ESTABLISHED = 1,
+  SFDP_API_SESSION_STATE_TIME_WAIT = 2,
+};
+
+enum sfdp_sp_node : u8
+{
+  SFDP_API_SP_NODE_IP4_REASS = 0,
+  SFDP_API_SP_NODE_IP6_REASS = 1,
+  SFDP_API_SP_NODE_IP4_UNKNOWN_PROTO = 2,
+  SFDP_API_SP_NODE_IP6_UNKNOWN_PROTO = 3,
+  SFDP_API_SP_NODE_IP4_ICMP4_ERROR = 4,
+  SFDP_API_SP_NODE_IP6_ICMP6_ERROR = 5,
+};
+
+enum sfdp_session_type : u8
+{
+  SFDP_API_SESSION_TYPE_IP4 = 0,
+};
+
+typedef sfdp_service_name
+{
+  string data[32];
+};
+
+typedef sfdp_session_key
+{
+  u32 context_id;
+  vl_api_address_t init_addr;
+  u16 init_port;
+  vl_api_address_t resp_addr;
+  u16 resp_port;
+};
\ No newline at end of file
diff --git a/src/vnet/sfdp/sfdp_types_funcs.h b/src/vnet/sfdp/sfdp_types_funcs.h
new file mode 100644 (file)
index 0000000..956020a
--- /dev/null
@@ -0,0 +1,90 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright (c) 2025 Cisco Systems, Inc.
+ */
+
+#ifndef __included_sfdp_types_funcs_h__
+#define __included_sfdp_types_funcs_h__
+
+#include <vnet/sfdp/sfdp.h>
+#include <vnet/sfdp/sfdp_types.api_types.h>
+#include <vnet/sfdp/sfdp_types.api_enum.h>
+#include <vnet/ip/ip_types_api.h>
+static_always_inline u8
+sfdp_api_direction (vl_api_sfdp_session_direction_t dir)
+{
+  switch (dir)
+    {
+    case SFDP_API_FORWARD:
+      return SFDP_FLOW_FORWARD;
+    case SFDP_API_REVERSE:
+      return SFDP_API_REVERSE;
+    }
+  return SFDP_FLOW_FORWARD;
+}
+
+static_always_inline vl_api_sfdp_session_type_t
+sfdp_session_type_encode (sfdp_session_type_t x)
+{
+  switch (x)
+    {
+    case SFDP_SESSION_TYPE_IP4:
+      return SFDP_API_SESSION_TYPE_IP4;
+    default:
+      return -1;
+    }
+};
+
+static_always_inline u8
+sfdp_api_sp_node (vl_api_sfdp_sp_node_t sp_node)
+{
+  switch (sp_node)
+    {
+    case SFDP_API_SP_NODE_IP4_REASS:
+      return SFDP_SP_NODE_IP4_REASS;
+
+    case SFDP_API_SP_NODE_IP6_REASS:
+      return SFDP_SP_NODE_IP6_REASS;
+
+    case SFDP_API_SP_NODE_IP4_UNKNOWN_PROTO:
+      return SFDP_SP_NODE_IP4_UNKNOWN_PROTO;
+
+    case SFDP_API_SP_NODE_IP6_UNKNOWN_PROTO:
+      return SFDP_SP_NODE_IP6_UNKNOWN_PROTO;
+
+    case SFDP_API_SP_NODE_IP4_ICMP4_ERROR:
+      return SFDP_SP_NODE_IP4_ICMP4_ERROR;
+
+    case SFDP_API_SP_NODE_IP6_ICMP6_ERROR:
+      return SFDP_SP_NODE_IP6_ICMP6_ERROR;
+
+    default:
+      return 0;
+    }
+}
+
+static_always_inline void
+sfdp_session_ip46_key_encode (sfdp_session_ip46_key_t *skey, ip46_type_t type,
+                             vl_api_sfdp_session_key_t *out)
+{
+  ip46_address_t ip_addr_lo, ip_addr_hi;
+  if (type == IP46_TYPE_IP4)
+    {
+      out->context_id = clib_host_to_net_u32 (skey->key4.context_id);
+      ip_addr_lo.ip4.as_u32 = skey->key4.ip4_key.ip_addr_lo;
+      ip_addr_hi.ip4.as_u32 = skey->key4.ip4_key.ip_addr_hi;
+      out->init_port = clib_host_to_net_u16 (skey->key4.ip4_key.port_lo);
+      out->resp_port = clib_host_to_net_u16 (skey->key4.ip4_key.port_hi);
+    }
+  else
+    {
+      out->context_id = clib_host_to_net_u32 (skey->key6.context_id);
+      ip_addr_lo.ip6 = skey->key6.ip6_key.ip6_addr_lo;
+      ip_addr_hi.ip6 = skey->key6.ip6_key.ip6_addr_hi;
+      out->init_port = clib_host_to_net_u16 (skey->key6.ip6_key.port_lo);
+      out->resp_port = clib_host_to_net_u16 (skey->key6.ip6_key.port_hi);
+    }
+  ip_address_encode (&ip_addr_lo, type, &out->init_addr);
+  ip_address_encode (&ip_addr_hi, type, &out->resp_addr);
+}
+
+#endif /*__included_sfdp_types_funcs_h__*/
\ No newline at end of file
diff --git a/src/vnet/sfdp/timer/timer.c b/src/vnet/sfdp/timer/timer.c
new file mode 100644 (file)
index 0000000..649dbe8
--- /dev/null
@@ -0,0 +1,161 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright (c) 2025 Cisco Systems, Inc.
+ */
+
+#include <vnet/sfdp/timer/timer.h>
+
+#include <vlib/vlib.h>
+
+#include <vnet/sfdp/expiry/expiry.h>
+#include <vnet/sfdp/sfdp.h>
+
+sfdp_timer_main_t sfdp_timer_main;
+
+static void
+expired_timer_callback (u32 *expired)
+{
+  u32 *e;
+  uword thread_index = vlib_get_thread_index ();
+  sfdp_timer_main_t *t = &sfdp_timer_main;
+  sfdp_timer_per_thread_data_t *ptd =
+    vec_elt_at_index (t->per_thread_data, thread_index);
+  vec_foreach (e, expired)
+    {
+      u32 session_idx = e[0] & SFDP_TIMER_SI_MASK;
+      vec_add1 (ptd->expired_sessions, session_idx);
+    }
+}
+
+static void
+timer_expiry_cb_enable ()
+{
+  sfdp_timer_main_t *t = &sfdp_timer_main;
+  vlib_thread_main_t *tm = vlib_get_thread_main ();
+  vec_validate (t->per_thread_data, tm->n_vlib_mains - 1);
+  sfdp_timer_per_thread_data_t *ptd;
+  vec_foreach (ptd, t->per_thread_data)
+    {
+      ptd->expired_sessions = 0;
+      sfdp_tw_init (&ptd->wheel, expired_timer_callback, SFDP_TIMER_INTERVAL,
+                   ~0);
+    }
+}
+
+static void
+timer_expiry_cb_disable ()
+{
+  // Cleanup timer wheel ? Disabling not supported for now.
+}
+
+static u32 *
+timer_expiry_cb_expire_or_evict_sessions (u32 desired_expiries,
+                                         u32 *expired_sessions_vec)
+{
+  (void) desired_expiries; // TODO: Early discards not supported for now.
+
+  sfdp_timer_main_t *t = &sfdp_timer_main;
+  vlib_main_t *vm = vlib_get_main ();
+  u32 tidx = vlib_get_thread_index ();
+  sfdp_timer_per_thread_data_t *ptd =
+    vec_elt_at_index (t->per_thread_data, tidx);
+  u32 session_index;
+
+  f64 now = vlib_time_now (vm);
+  ptd->current_time = now;
+
+  sfdp_expire_timers (&ptd->wheel, now);
+
+  sfdp_session_index_iterate_expired (ptd, session_index)
+  {
+    sfdp_session_t *session = sfdp_session_at_index (session_index);
+    sfdp_session_timer_t *timer = SFDP_SESSION_TIMER (session);
+    f64 diff =
+      (timer->next_expiration - (ptd->current_time + SFDP_TIMER_INTERVAL)) /
+      SFDP_TIMER_INTERVAL;
+    if (diff > (f64) 1.)
+      {
+       /* Rearm the timer accordingly */
+       sfdp_session_timer_start (&ptd->wheel, timer, session_index,
+                                 ptd->current_time, diff);
+      }
+    else
+      {
+       vec_add1 (expired_sessions_vec, session_index);
+      }
+  }
+
+  return expired_sessions_vec;
+}
+
+static void
+timer_expiry_cb_notify_new_sessions (const u32 *new_sessions, u32 len)
+{
+  sfdp_main_t *sfdp = &sfdp_main;
+  sfdp_timer_main_t *t = &sfdp_timer_main;
+  vlib_main_t *vm = vlib_get_main ();
+  u32 tidx = vlib_get_thread_index ();
+  sfdp_timer_per_thread_data_t *ptd =
+    vec_elt_at_index (t->per_thread_data, tidx);
+  const u32 *session_index = new_sessions;
+  f64 time_now = vlib_time_now (vm);
+  ptd->current_time = time_now;
+
+  // Start session timer in embryonic mode
+  while (len)
+    {
+      sfdp_session_t *session = sfdp_session_at_index (*session_index);
+      sfdp_session_timer_t *timer = SFDP_SESSION_TIMER (session);
+      sfdp_tenant_t *tenant = sfdp_tenant_at_index (sfdp, session->tenant_idx);
+      sfdp_session_timer_start (&ptd->wheel, timer, *session_index, time_now,
+                               tenant->timeouts[SFDP_TIMEOUT_EMBRYONIC]);
+
+      len--;
+      session_index++;
+    }
+}
+
+static f64
+timer_expiry_cb_session_remaining_time (sfdp_session_t *session, f64 now)
+{
+  return SFDP_SESSION_TIMER (session)->next_expiration - now;
+}
+
+static u8 *
+timer_expiry_cb_format_session_details (u8 *s, va_list *args)
+{
+  sfdp_session_t *session = va_arg (*args, sfdp_session_t *);
+  f64 now = va_arg (*args, f64);
+  sfdp_session_timer_t *timer = SFDP_SESSION_TIMER (session);
+  f64 remaining_time = timer->next_expiration - now;
+  s = format (s, "expires after: %fs\n", remaining_time);
+  return s;
+}
+
+u32
+sfdp_timer_register_as_expiry_module ()
+{
+  sfdp_timeout_t timeouts[SFDP_MAX_TIMEOUTS] = {};
+  int ret;
+  u32 i = 0;
+#define _(n, v, str)                                                          \
+  timeouts[i].name = str;                                                     \
+  timeouts[i].val = v;                                                        \
+  i++;
+  foreach_sfdp_timeout
+#undef _
+
+    if ((ret = sfdp_init_timeouts (timeouts, i)))
+  {
+    return ret;
+  }
+
+  sfdp_expiry_callbacks_t cbs = {
+    .enable = timer_expiry_cb_enable,
+    .disable = timer_expiry_cb_disable,
+    .expire_or_evict_sessions = timer_expiry_cb_expire_or_evict_sessions,
+    .notify_new_sessions = timer_expiry_cb_notify_new_sessions,
+    .session_remaining_time = timer_expiry_cb_session_remaining_time,
+    .format_session_details = timer_expiry_cb_format_session_details
+  };
+  return sfdp_set_expiry_callbacks (&cbs);
+}
diff --git a/src/vnet/sfdp/timer/timer.h b/src/vnet/sfdp/timer/timer.h
new file mode 100644 (file)
index 0000000..48ef288
--- /dev/null
@@ -0,0 +1,142 @@
+/* SPDX-License-Identifier: Apache-2.0
+ * Copyright (c) 2025 Cisco Systems, Inc.
+ */
+
+#ifndef __included_sfdp_timer_h__
+#define __included_sfdp_timer_h__
+#include <vppinfra/tw_timer_2t_1w_2048sl.h>
+#include <vppinfra/vec.h>
+
+#include <vnet/sfdp/sfdp.h>
+
+typedef tw_timer_wheel_2t_1w_2048sl_t sfdp_tw_t;
+
+typedef struct
+{
+  sfdp_tw_t wheel;
+  f64 current_time;
+  u32 *expired_sessions;
+} sfdp_timer_per_thread_data_t;
+
+typedef struct
+{
+  sfdp_timer_per_thread_data_t *per_thread_data;
+} sfdp_timer_main_t;
+
+extern sfdp_timer_main_t sfdp_timer_main;
+
+// Per session state held in sfdp session expiry opaque data
+typedef struct
+{
+  f64 next_expiration;
+  u32 handle;
+  u32 __unused;
+} __attribute__ ((may_alias)) sfdp_session_timer_t;
+
+#define foreach_sfdp_timeout                                                  \
+  _ (EMBRYONIC, 5, "embryonic")                                               \
+  _ (ESTABLISHED, 120, "established")                                         \
+  _ (TCP_ESTABLISHED, 3600, "tcp-established")                                \
+  _ (SECURITY, 30, "security")
+
+typedef enum
+{
+#define _(name, val, str) SFDP_TIMEOUT_##name,
+  foreach_sfdp_timeout
+#undef _
+    SFDP_N_TIMEOUT
+} sfdp_timeout_type_t;
+
+#define SFDP_SESSION_TIMER(session)                                           \
+  SFDP_EXPIRY_SESSION (session, sfdp_session_timer_t)
+
+SFDP_EXPIRY_STATIC_ASSERT_FITS_IN_EXPIRY_OPAQUE (sfdp_session_timer_t);
+
+#define sfdp_timer_start_internal  tw_timer_start_2t_1w_2048sl
+#define sfdp_timer_stop_internal   tw_timer_stop_2t_1w_2048sl
+#define sfdp_timer_update_internal tw_timer_update_2t_1w_2048sl
+#define sfdp_expire_timers        tw_timer_expire_timers_2t_1w_2048sl
+#define SFDP_TIMER_SI_MASK        (0x7fffffff)
+#define SFDP_TIMER_INTERVAL       ((f64) 1.0) /*in seconds*/
+#define SFDP_SECONDS_TO_TICKS     (seconds) ((seconds) / SFDP_TIMER_INTERVAL)
+#define SFDP_TICKS_TO_SECONDS     (ticks) ((ticks) *SFDP_TIMER_INTERVAL)
+
+static_always_inline sfdp_timer_per_thread_data_t *
+sfdp_timer_get_per_thread_data (u32 thread_index)
+{
+  return vec_elt_at_index (sfdp_timer_main.per_thread_data, thread_index);
+}
+
+static_always_inline void
+sfdp_tw_init (sfdp_tw_t *tw, void *expired_timer_callback, f64 timer_interval,
+             u32 max_expirations)
+{
+  tw_timer_wheel_init_2t_1w_2048sl (tw, expired_timer_callback, timer_interval,
+                                   max_expirations);
+}
+
+/* Use timer mechanism for expiry.
+ * This must be called while sfdp is not running yet.
+ * Will return 0 on success, -1 otherwise. */
+u32 sfdp_timer_register_as_expiry_module ();
+
+static_always_inline void
+sfdp_session_timer_start (sfdp_tw_t *tw, sfdp_session_timer_t *timer,
+                         u32 session_index, f64 now, u32 ticks)
+{
+  timer->handle = sfdp_timer_start_internal (tw, session_index, 0, ticks);
+  timer->next_expiration = now + ticks * SFDP_TIMER_INTERVAL;
+}
+
+static_always_inline void
+sfdp_session_timer_stop (sfdp_tw_t *tw, sfdp_session_timer_t *timer)
+{
+  sfdp_timer_stop_internal (tw, timer->handle);
+}
+
+static_always_inline void
+sfdp_session_timer_update (sfdp_tw_t *tw, sfdp_session_timer_t *timer, f64 now,
+                          u32 ticks)
+{
+  timer->next_expiration = now + ticks * SFDP_TIMER_INTERVAL;
+}
+
+static_always_inline void
+sfdp_session_timer_update_maybe_past (sfdp_tw_t *tw,
+                                     sfdp_session_timer_t *timer, f64 now,
+                                     u32 ticks)
+{
+  if (timer->next_expiration > now + (ticks * SFDP_TIMER_INTERVAL))
+    sfdp_timer_update_internal (tw, timer->handle, ticks);
+
+  timer->next_expiration = now + ticks * SFDP_TIMER_INTERVAL;
+}
+
+static_always_inline void
+sfdp_session_timer_update_unlikely_past (sfdp_tw_t *tw,
+                                        sfdp_session_timer_t *timer, f64 now,
+                                        u32 ticks)
+{
+  if (PREDICT_FALSE (timer->next_expiration >
+                    now + (ticks * SFDP_TIMER_INTERVAL)))
+    {
+      sfdp_timer_update_internal (tw, timer->handle, ticks);
+    }
+  sfdp_session_timer_update (tw, timer, now, ticks);
+}
+
+static_always_inline uword
+vec_reset_len_return (u32 *v)
+{
+  vec_reset_length (v);
+  return 0;
+}
+
+#define sfdp_session_index_iterate_expired(ptd, s)                            \
+  for (u32 *s_ptr = (ptd)->expired_sessions;                                  \
+       ((s_ptr < vec_end (ptd->expired_sessions)) &&                          \
+       (((s) = s_ptr[0]) || 1)) ||                                           \
+       vec_reset_len_return ((ptd)->expired_sessions);                        \
+       s_ptr++)
+
+#endif /* __included_sfdp_timer_h__ */