ioam: manycast using iOAM and SR (VPP-628) 46/4746/16
authorShwetha Bhandari <shwethab@cisco.com>
Wed, 18 Jan 2017 07:13:54 +0000 (12:43 +0530)
committerDamjan Marion <dmarion.lists@gmail.com>
Mon, 6 Mar 2017 20:00:14 +0000 (20:00 +0000)
Change-Id: I6d2afda991d771fb4a89fc3f6544f8e940a9b9f0
Signed-off-by: Shwetha Bhandari <shwethab@cisco.com>
18 files changed:
src/plugins/ioam.am
src/plugins/ioam/analyse/ioam_analyse.h
src/plugins/ioam/analyse/ip6/ip6_ioam_analyse.h
src/plugins/ioam/encap/ip6_ioam_trace.c
src/plugins/ioam/encap/ip6_ioam_trace.h
src/plugins/ioam/ip6/ioam_cache.api [new file with mode: 0644]
src/plugins/ioam/ip6/ioam_cache.c [new file with mode: 0644]
src/plugins/ioam/ip6/ioam_cache.h [new file with mode: 0644]
src/plugins/ioam/ip6/ioam_cache_all_api_h.h [new file with mode: 0644]
src/plugins/ioam/ip6/ioam_cache_msg_enum.h [new file with mode: 0644]
src/plugins/ioam/ip6/ioam_cache_node.c [new file with mode: 0644]
src/plugins/ioam/ip6/ioam_cache_tunnel_select_node.c [new file with mode: 0644]
src/plugins/ioam/lib-trace/trace_util.h
src/vnet/fib/fib_entry.h
src/vnet/fib/fib_entry_src.c
src/vnet/ip/ip6.h
src/vnet/ip/ip6_forward.c
src/vnet/sr/sr.h

index 14d8a9e..4346e3c 100644 (file)
@@ -75,7 +75,8 @@ IOAM_TRACE_NOINST_HDR =                 \
   ioam/lib-trace/trace_all_api_h.h     \
   ioam/lib-trace/trace_msg_enum.h      \
   ioam/lib-trace/trace.api.h           \
-  ioam/lib-trace/trace_util.h
+  ioam/lib-trace/trace_util.h          \
+  ioam/encap/ip6_ioam_trace.h
 
 IOAM_TRACE_API = ioam/lib-trace/trace.api
 
@@ -163,6 +164,23 @@ IOAM_ANALYSE_SRC =                                 \
        ioam/analyse/ioam_analyse.h             \
        ioam/analyse/ioam_summary_export.h
 
+########################################
+# iOAM record cache and rewrite
+########################################
+
+IOAM_IP6_MANYCAST_SRC =        \
+ioam/ip6/ioam_cache.c                     \
+ioam/ip6/ioam_cache_node.c                \
+ioam/ip6/ioam_cache_tunnel_select_node.c  \
+ioam/ip6/ioam_cache.api.h
+
+IOAM_IP6_MANYCAST_API = ioam/ip6/ioam_cache.api
+
+IOAM_IP6_MANYCAST_NOINST_HDR =      \
+  ioam/ip6/ioam_cache_all_api_h.h   \
+  ioam/ip6/ioam_cache_msg_enum.h    \
+  ioam/ip6/ioam_cache.api.h
+
 ########################################
 # iOAM plugins
 ########################################
@@ -174,20 +192,23 @@ ioam_plugin_la_SOURCES =          \
         $(IOAM_VXLAN_GPE_SRC)          \
         $(IOAM_E2E_SRC)                        \
        $(IPFIX_COLLECTOR_SRC)          \
-       $(IOAM_ANALYSE_SRC)
+       $(IOAM_ANALYSE_SRC)             \
+       $(IOAM_IP6_MANYCAST_SRC)
 
 API_FILES +=                           \
         $(IOAM_POT_API)                 \
         $(IOAM_EXPORT_API)              \
         $(IOAM_TRACE_API)               \
-        $(IOAM_VXLAN_GPE_API)
+        $(IOAM_VXLAN_GPE_API)          \
+        $(IOAM_IP6_MANYCAST_API)
 
 noinst_HEADERS +=                               \
         $(IOAM_POT_NOINST_HDR)                  \
         $(IOAM_EXPORT_NOINST_HDR)               \
         $(IOAM_TRACE_NOINST_HDR)                \
         $(IOAM_VXLAN_GPE_NOINST_HDR)            \
-        $(IOAM_E2E_NOINST_HDR)
+        $(IOAM_E2E_NOINST_HDR)                 \
+        $(IOAM_IP6_MANYCAST_NOINST_HDR)
 
 vppplugins_LTLIBRARIES += ioam_plugin.la
 
index d5b6fbe..3c69d71 100644 (file)
@@ -123,12 +123,14 @@ typedef struct ioam_analyser_data_t_
 } ioam_analyser_data_t;
 
 always_inline f64
-ip6_ioam_analyse_calc_delay (ioam_trace_hdr_t * trace, u16 trace_len)
+ip6_ioam_analyse_calc_delay (ioam_trace_hdr_t * trace, u16 trace_len,
+                            u8 oneway)
 {
   u16 size_of_traceopt_per_node, size_of_all_traceopts;
   u8 num_nodes;
-  u32 *start_elt, *end_elt;
+  u32 *start_elt, *end_elt, *uturn_elt;;
   u32 start_time, end_time;
+  u8 done = 0;
 
   size_of_traceopt_per_node = fetch_trace_data_size (trace->ioam_trace_type);
   // Unknown trace type
@@ -145,6 +147,19 @@ ip6_ioam_analyse_calc_delay (ioam_trace_hdr_t * trace, u16 trace_len)
     trace->elts +
     (u32) (size_of_traceopt_per_node * (num_nodes - 1) / sizeof (u32));
 
+  if (oneway && (trace->ioam_trace_type & BIT_TTL_NODEID))
+    {
+      done = 0;
+      do
+       {
+         uturn_elt = start_elt - size_of_traceopt_per_node / sizeof (u32);
+
+         if ((clib_net_to_host_u32 (*start_elt) >> 24) <=
+             (clib_net_to_host_u32 (*uturn_elt) >> 24))
+           done = 1;
+       }
+      while (!done && (start_elt = uturn_elt) != end_elt);
+    }
   if (trace->ioam_trace_type & BIT_TTL_NODEID)
     {
       start_elt++;
@@ -155,7 +170,6 @@ ip6_ioam_analyse_calc_delay (ioam_trace_hdr_t * trace, u16 trace_len)
       start_elt++;
       end_elt++;
     }
-
   start_time = clib_net_to_host_u32 (*start_elt);
   end_time = clib_net_to_host_u32 (*end_elt);
 
@@ -273,11 +287,10 @@ ip6_ioam_analyse_hbh_trace (ioam_analyser_data_t * data,
 found_match:
   trace_record->pkt_counter++;
   trace_record->bytes_counter += pak_len;
-
   if (trace->ioam_trace_type & BIT_TIMESTAMP)
     {
       /* Calculate time delay */
-      u32 delay = (u32) ip6_ioam_analyse_calc_delay (trace, trace_len);
+      u32 delay = (u32) ip6_ioam_analyse_calc_delay (trace, trace_len, 0);
       if (delay < trace_record->min_delay)
        trace_record->min_delay = delay;
       else if (delay > trace_record->max_delay)
index f6abdce..5a2a2d7 100644 (file)
@@ -18,6 +18,7 @@
 
 #include <ioam/analyse/ioam_analyse.h>
 #include <vnet/ip/ip6_hop_by_hop.h>
+#include <ioam/encap/ip6_ioam_trace.h>
 
 /** @brief IP6-iOAM analyser main structure.
     @note cache aligned.
@@ -57,6 +58,64 @@ ioam_analyse_get_data_from_flow_id (u32 flow_id)
   return (ioam_analyser_main.aggregated_data + flow_id);
 }
 
+always_inline void *
+ip6_ioam_find_hbh_option (ip6_hop_by_hop_header_t * hbh0, u8 option)
+{
+  ip6_hop_by_hop_option_t *opt0, *limit0;
+  u8 type0;
+
+  opt0 = (ip6_hop_by_hop_option_t *) (hbh0 + 1);
+  limit0 =
+    (ip6_hop_by_hop_option_t *) ((u8 *) hbh0 + ((hbh0->length + 1) << 3));
+
+  while (opt0 < limit0)
+    {
+      type0 = opt0->type;
+      if (type0 == option)
+       return ((void *) opt0);
+
+      if (0 == type0)
+       {
+         opt0 = (ip6_hop_by_hop_option_t *) ((u8 *) opt0) + 1;
+         continue;
+       }
+      opt0 = (ip6_hop_by_hop_option_t *)
+       (((u8 *) opt0) + opt0->length + sizeof (ip6_hop_by_hop_option_t));
+    }
+
+  return NULL;
+}
+
+always_inline int
+ip6_ioam_analyse_compare_path_delay (ip6_hop_by_hop_header_t * hbh0,
+                                    ip6_hop_by_hop_header_t * hbh1,
+                                    bool oneway)
+{
+  ioam_trace_option_t *trace0 = NULL, *trace1 = NULL;
+  f64 delay0, delay1;
+
+  trace0 =
+    ip6_ioam_find_hbh_option (hbh0, HBH_OPTION_TYPE_IOAM_TRACE_DATA_LIST);
+  trace1 =
+    ip6_ioam_find_hbh_option (hbh1, HBH_OPTION_TYPE_IOAM_TRACE_DATA_LIST);
+
+  if (PREDICT_FALSE ((trace0 == NULL) && (trace1 == NULL)))
+    return 0;
+
+  if (PREDICT_FALSE (trace1 == NULL))
+    return 1;
+
+  if (PREDICT_FALSE (trace0 == NULL))
+    return -1;
+
+  delay0 = ip6_ioam_analyse_calc_delay (&trace0->trace_hdr,
+                                       trace0->hdr.length - 2, oneway);
+  delay1 = ip6_ioam_analyse_calc_delay (&trace1->trace_hdr,
+                                       trace1->hdr.length - 2, oneway);
+
+  return (delay0 - delay1);
+}
+
 #endif /* PLUGINS_IOAM_PLUGIN_IOAM_ANALYSE_IP6_IOAM_ANALYSE_NODE_H_ */
 
 /*
index 6972ba4..f1eb1bf 100644 (file)
@@ -40,7 +40,6 @@ typedef union
   u32 as_u32[2];
 } time_u64_t;
 
-
 extern ip6_hop_by_hop_ioam_main_t ip6_hop_by_hop_ioam_main;
 extern ip6_main_t ip6_main;
 
index b332b31..620b70a 100644 (file)
@@ -1,5 +1,7 @@
 /*
  * Copyright (c) 2017 Cisco and/or its affiliates.
+ * trace_util.h -- Trace Profile Utility header
+ *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
  * You may obtain a copy of the License at:
@@ -26,7 +28,6 @@ typedef CLIB_PACKED(struct {
 }) ioam_trace_option_t;
 /* *INDENT-ON* */
 
-
 #endif /* PLUGINS_IOAM_PLUGIN_IOAM_ENCAP_IP6_IOAM_TRACE_H_ */
 
 /*
diff --git a/src/plugins/ioam/ip6/ioam_cache.api b/src/plugins/ioam/ip6/ioam_cache.api
new file mode 100644 (file)
index 0000000..de50d57
--- /dev/null
@@ -0,0 +1,37 @@
+/* Hey Emacs use -*- mode: C -*- */
+/*
+ * Copyright (c) 2017 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*  API to control ioam caching */
+
+define ioam_cache_ip6_enable_disable {
+    /* Client identifier, set from api_main.my_client_index */
+    u32 client_index;
+
+    /* Arbitrary context, so client can match reply to request */
+    u32 context;
+
+    /* Enable / disable the feature */
+    u8 is_disable;
+
+};
+
+define ioam_cache_ip6_enable_disable_reply {
+    /* From the request */
+    u32 context;
+
+    /* Return value, zero means all OK */
+    i32 retval;
+};
diff --git a/src/plugins/ioam/ip6/ioam_cache.c b/src/plugins/ioam/ip6/ioam_cache.c
new file mode 100644 (file)
index 0000000..9e90ff9
--- /dev/null
@@ -0,0 +1,386 @@
+/*
+ * Copyright (c) 2017 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ *------------------------------------------------------------------
+ * ioam_cache.c - ioam ip6 API / debug CLI handling
+ *------------------------------------------------------------------
+ */
+
+#include <vnet/vnet.h>
+#include <vnet/plugin/plugin.h>
+#include <ioam/ip6/ioam_cache.h>
+
+#include <vlibapi/api.h>
+#include <vlibmemory/api.h>
+#include <vlibsocket/api.h>
+#include <vnet/ip/ip6_hop_by_hop.h>
+
+#include "ioam_cache.h"
+
+/* define message IDs */
+#include <ioam/ip6/ioam_cache_msg_enum.h>
+
+/* define message structures */
+#define vl_typedefs
+#include <ioam/ip6/ioam_cache_all_api_h.h>
+#undef vl_typedefs
+
+/* define generated endian-swappers */
+#define vl_endianfun
+#include <ioam/ip6/ioam_cache_all_api_h.h>
+#undef vl_endianfun
+
+/* instantiate all the print functions we know about */
+#define vl_print(handle, ...) vlib_cli_output (handle, __VA_ARGS__)
+#define vl_printfun
+#include <ioam/ip6/ioam_cache_all_api_h.h>
+#undef vl_printfun
+
+/* Get the API version number */
+#define vl_api_version(n,v) static u32 api_version=(v);
+#include <ioam/ip6/ioam_cache_all_api_h.h>
+#undef vl_api_version
+
+/*
+ * A handy macro to set up a message reply.
+ * Assumes that the following variables are available:
+ * mp - pointer to request message
+ * rmp - pointer to reply message type
+ * rv - return value
+ */
+
+#define REPLY_MACRO(t)                                          \
+do {                                                            \
+    unix_shared_memory_queue_t * q =                            \
+    vl_api_client_index_to_input_queue (mp->client_index);      \
+    if (!q)                                                     \
+        return;                                                 \
+                                                                \
+    rmp = vl_msg_api_alloc (sizeof (*rmp));                     \
+    rmp->_vl_msg_id = ntohs((t)+cm->msg_id_base);               \
+    rmp->context = mp->context;                                 \
+    rmp->retval = ntohl(rv);                                    \
+                                                                \
+    vl_msg_api_send_shmem (q, (u8 *)&rmp);                      \
+} while(0);
+
+
+/* List of message types that this plugin understands */
+
+#define foreach_ioam_cache_plugin_api_msg                        \
+_(IOAM_CACHE_IP6_ENABLE_DISABLE, ioam_cache_ip6_enable_disable)
+
+static u8 *
+ioam_e2e_id_trace_handler (u8 * s, ip6_hop_by_hop_option_t * opt)
+{
+  ioam_e2e_id_option_t *e2e = (ioam_e2e_id_option_t *) opt;
+
+  if (e2e)
+    {
+      s =
+       format (s, "IP6_HOP_BY_HOP E2E ID = %U\n", format_ip6_address,
+               &(e2e->id));
+    }
+
+
+  return s;
+}
+
+static u8 *
+ioam_e2e_cache_trace_handler (u8 * s, ip6_hop_by_hop_option_t * opt)
+{
+  ioam_e2e_cache_option_t *e2e = (ioam_e2e_cache_option_t *) opt;
+
+  if (e2e)
+    {
+      s =
+       format (s, "IP6_HOP_BY_HOP E2E CACHE = pool:%d idx:%d\n",
+               e2e->pool_id, e2e->pool_index);
+    }
+
+
+  return s;
+}
+
+/* Action function shared between message handler and debug CLI */
+int
+ioam_cache_ip6_enable_disable (ioam_cache_main_t * em, u8 is_disable)
+{
+  vlib_main_t *vm = em->vlib_main;
+
+  if (is_disable == 0)
+    {
+      ioam_cache_table_init (vm);
+      ip6_hbh_set_next_override (em->cache_hbh_slot);
+      ip6_hbh_register_option (HBH_OPTION_TYPE_IOAM_EDGE_TO_EDGE_ID,
+                              0, ioam_e2e_id_trace_handler);
+      ip6_hbh_register_option (HBH_OPTION_TYPE_IOAM_E2E_CACHE_ID,
+                              0, ioam_e2e_cache_trace_handler);
+
+    }
+  else
+    {
+      ip6_hbh_set_next_override (IP6_LOOKUP_NEXT_POP_HOP_BY_HOP);
+      ioam_cache_table_destroy (vm);
+      ip6_hbh_unregister_option (HBH_OPTION_TYPE_IOAM_EDGE_TO_EDGE_ID);
+      ip6_hbh_unregister_option (HBH_OPTION_TYPE_IOAM_E2E_CACHE_ID);
+    }
+
+  return 0;
+}
+
+/* Action function shared between message handler and debug CLI */
+int
+ioam_tunnel_select_ip6_enable_disable (ioam_cache_main_t * em,
+                                      u8 criteria,
+                                      u8 no_of_responses, u8 is_disable)
+{
+  vlib_main_t *vm = em->vlib_main;
+
+  if (is_disable == 0)
+    {
+      ioam_cache_ts_table_init (vm);
+      em->criteria_oneway = criteria;
+      em->wait_for_responses = no_of_responses;
+      ip6_hbh_set_next_override (em->ts_hbh_slot);
+      ip6_ioam_ts_cache_set_rewrite ();
+      ip6_hbh_register_option (HBH_OPTION_TYPE_IOAM_EDGE_TO_EDGE_ID,
+                              0, ioam_e2e_id_trace_handler);
+      ip6_hbh_register_option (HBH_OPTION_TYPE_IOAM_E2E_CACHE_ID,
+                              0, ioam_e2e_cache_trace_handler);
+
+      /* Turn on the cleanup process */
+      //      vlib_process_signal_event (vm, em->cleanup_process_node_index, 1, 0);
+    }
+  else
+    {
+      ioam_cache_ts_timer_node_enable (vm, 0);
+      ip6_hbh_set_next_override (IP6_LOOKUP_NEXT_POP_HOP_BY_HOP);
+      ioam_cache_ts_table_destroy (vm);
+      ip6_ioam_ts_cache_cleanup_rewrite ();
+      ip6_hbh_unregister_option (HBH_OPTION_TYPE_IOAM_EDGE_TO_EDGE_ID);
+      ip6_hbh_unregister_option (HBH_OPTION_TYPE_IOAM_E2E_CACHE_ID);
+    }
+
+  return 0;
+}
+
+/* API message handler */
+static void vl_api_ioam_cache_ip6_enable_disable_t_handler
+  (vl_api_ioam_cache_ip6_enable_disable_t * mp)
+{
+  vl_api_ioam_cache_ip6_enable_disable_reply_t *rmp;
+  ioam_cache_main_t *cm = &ioam_cache_main;
+  int rv;
+
+  rv = ioam_cache_ip6_enable_disable (cm, (int) (mp->is_disable));
+  REPLY_MACRO (VL_API_IOAM_CACHE_IP6_ENABLE_DISABLE_REPLY);
+}
+
+/* Set up the API message handling tables */
+static clib_error_t *
+ioam_cache_plugin_api_hookup (vlib_main_t * vm)
+{
+  ioam_cache_main_t *sm = &ioam_cache_main;
+#define _(N,n)                                                  \
+    vl_msg_api_set_handlers((VL_API_##N + sm->msg_id_base),     \
+                           #n,                                 \
+                           vl_api_##n##_t_handler,              \
+                           vl_noop_handler,                     \
+                           vl_api_##n##_t_endian,               \
+                           vl_api_##n##_t_print,                \
+                           sizeof(vl_api_##n##_t), 1);
+  foreach_ioam_cache_plugin_api_msg;
+#undef _
+
+  return 0;
+}
+
+static clib_error_t *
+set_ioam_cache_command_fn (vlib_main_t * vm,
+                          unformat_input_t * input, vlib_cli_command_t * cmd)
+{
+  ioam_cache_main_t *em = &ioam_cache_main;
+  u8 is_disable = 0;
+
+  while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+    {
+      if (unformat (input, "disable"))
+       is_disable = 1;
+      else
+       break;
+    }
+  ioam_cache_ip6_enable_disable (em, is_disable);
+
+  return 0;
+}
+
+/* *INDENT_OFF* */
+VLIB_CLI_COMMAND (set_ioam_cache_command, static) =
+{
+.path = "set ioam ip6 cache",.short_help =
+    "set ioam ip6 cache [disable]",.function = set_ioam_cache_command_fn};
+/* *INDENT_ON* */
+
+#define IOAM_TS_WAIT_FOR_RESPONSES 3
+static clib_error_t *
+set_ioam_tunnel_select_command_fn (vlib_main_t * vm,
+                                  unformat_input_t * input,
+                                  vlib_cli_command_t * cmd)
+{
+  ioam_cache_main_t *em = &ioam_cache_main;
+  u8 is_disable = 0;
+  u8 one_way = 0;
+  u8 no_of_responses = IOAM_TS_WAIT_FOR_RESPONSES;
+  while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+    {
+      if (unformat (input, "disable"))
+       is_disable = 1;
+      else if (unformat (input, "rtt"))
+       one_way = 0;
+      else if (unformat (input, "oneway"))
+       one_way = 1;
+      else if (unformat (input, "wait_for_responses %d", &no_of_responses))
+       ;
+      else
+       break;
+    }
+
+  ioam_tunnel_select_ip6_enable_disable (em, one_way, no_of_responses,
+                                        is_disable);
+
+  return 0;
+}
+
+/* *INDENT_OFF* */
+VLIB_CLI_COMMAND (set_ioam_cache_ts_command, static) =
+{
+.path = "set ioam ip6 sr-tunnel-select",.short_help =
+    "set ioam ip6 sr-tunnel-select [disable] [oneway|rtt] [wait_for_responses <n|default 3>]",.function
+    = set_ioam_tunnel_select_command_fn};
+/* *INDENT_ON* */
+
+static void
+ioam_cache_table_print (vlib_main_t * vm, u8 verbose)
+{
+  ioam_cache_main_t *cm = &ioam_cache_main;
+  ioam_cache_entry_t *entry = 0;
+  ioam_cache_ts_entry_t *ts_entry = 0;
+  int no_of_threads = vec_len (vlib_worker_threads);
+  int i;
+
+  pool_foreach (entry, cm->ioam_rewrite_pool, (
+                                               {
+                                               vlib_cli_output (vm, "%U",
+                                                                format_ioam_cache_entry,
+                                                                entry);
+                                               }));
+
+  if (cm->ts_stats)
+    for (i = 0; i < no_of_threads; i++)
+      {
+       vlib_cli_output (vm, "Number of entries in thread-%d selection pool: %lu\n \
+                          (pool found to be full: %lu times)", i,
+                        cm->ts_stats[i].inuse, cm->ts_stats[i].add_failed);
+
+       if (verbose == 1)
+         vlib_worker_thread_barrier_sync (vm);
+       pool_foreach (ts_entry, cm->ioam_ts_pool[i], (
+                                                      {
+                                                      vlib_cli_output (vm,
+                                                                       "%U",
+                                                                       format_ioam_cache_ts_entry,
+                                                                       ts_entry,
+                                                                       (u32)
+                                                                       i);
+                                                      }
+                     ));
+       vlib_worker_thread_barrier_release (vm);
+      }
+
+}
+
+static clib_error_t *
+show_ioam_cache_command_fn (vlib_main_t * vm,
+                           unformat_input_t * input,
+                           vlib_cli_command_t * cmd)
+{
+  u8 verbose = 0;
+
+  while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+    {
+      if (unformat (input, "verbose"))
+       verbose = 1;
+    }
+  ioam_cache_table_print (vm, verbose);
+
+
+  return 0;
+}
+
+/* *INDENT_OFF* */
+VLIB_CLI_COMMAND (show_ioam_cache_command, static) =
+{
+.path = "show ioam ip6 cache",.short_help =
+    "show ioam ip6 cache [verbose]",.function = show_ioam_cache_command_fn};
+/* *INDENT_ON* */
+
+static clib_error_t *
+ioam_cache_init (vlib_main_t * vm)
+{
+  ioam_cache_main_t *em = &ioam_cache_main;
+  clib_error_t *error = 0;
+  u8 *name;
+  u32 cache_node_index = ioam_cache_node.index;
+  u32 ts_node_index = ioam_cache_ts_node.index;
+  vlib_node_t *ip6_hbyh_node = NULL, *ip6_hbh_pop_node = NULL, *error_node =
+    NULL;
+
+  name = format (0, "ioam_cache_%08x%c", api_version, 0);
+
+  memset (&ioam_cache_main, 0, sizeof (ioam_cache_main));
+  /* Ask for a correctly-sized block of API message decode slots */
+  em->msg_id_base = vl_msg_api_get_msg_ids
+    ((char *) name, VL_MSG_FIRST_AVAILABLE);
+
+  error = ioam_cache_plugin_api_hookup (vm);
+  /* Hook this node to ip6-hop-by-hop */
+  ip6_hbyh_node = vlib_get_node_by_name (vm, (u8 *) "ip6-hop-by-hop");
+  em->cache_hbh_slot =
+    vlib_node_add_next (vm, ip6_hbyh_node->index, cache_node_index);
+  em->ts_hbh_slot =
+    vlib_node_add_next (vm, ip6_hbyh_node->index, ts_node_index);
+
+  ip6_hbh_pop_node = vlib_get_node_by_name (vm, (u8 *) "ip6-pop-hop-by-hop");
+  em->ip6_hbh_pop_node_index = ip6_hbh_pop_node->index;
+
+  error_node = vlib_get_node_by_name (vm, (u8 *) "error-drop");
+  em->error_node_index = error_node->index;
+  em->vlib_main = vm;
+
+  vec_free (name);
+
+  return error;
+}
+
+VLIB_INIT_FUNCTION (ioam_cache_init);
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/plugins/ioam/ip6/ioam_cache.h b/src/plugins/ioam/ip6/ioam_cache.h
new file mode 100644 (file)
index 0000000..aa88d58
--- /dev/null
@@ -0,0 +1,897 @@
+/*
+ * Copyright (c) 2017 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef __included_ioam_cache_h__
+#define __included_ioam_cache_h__
+
+#include <vnet/vnet.h>
+#include <vnet/ip/ip.h>
+#include <vnet/ip/ip_packet.h>
+#include <vnet/ip/ip4_packet.h>
+#include <vnet/ip/ip6_packet.h>
+#include <vnet/sr/sr.h>
+
+#include <vppinfra/pool.h>
+#include <vppinfra/hash.h>
+#include <vppinfra/error.h>
+#include <vppinfra/elog.h>
+#include <vppinfra/bihash_8_8.h>
+#include <ioam/analyse/ip6/ip6_ioam_analyse.h>
+#include <vppinfra/tw_timer_16t_2w_512sl.h>
+/*
+ * ioam_cache.h
+ * This header contains routines for caching of ioam header and
+ * buffer:
+ * 1 - On application facing node: to cache ioam header recvd
+ *     in request and reattach in response to provide round
+ *     trip path visibility. Since request response matching
+ *     is needed works with TCP and relies on (5 tuples,seq no)
+ * 2 - On M-Anycast server node: This node replicates requests
+ *    towards multiple anycast service nodes serving anycast
+ *    IP6 address. It evaluates response and forwards the best
+ *    response towards the client of requesting the service.
+ *    Again since request-response matching is needed, works
+ *    with TCP  and relies on (5 tuples,seq no) for matching.
+ *    To do this it caches SYN-ACK responses for a short time to
+ *    evaluate multiple responses received before the selected
+ *    SYN-ACK response is forwared and others dropped.
+ *
+ * M-Anycast server cache:
+ *   - There is a pool of cache entries per worker thread.
+ *   - Cache entry is created when SYN is received expected
+ *     number of responses are marked based on number of
+ *     SR tunnels for the anycast destination address
+ *   - The pool/thread id and pool index are attached in the
+ *    message as an ioam option for quick look up.
+ *   - When is received SYN-ACK the ioam option containing
+ *     thread id + pool index of the cache entry is used to
+ *     look up cache entry.
+ *   - Cache synchronization:
+ *      - This is achieved by cache entry add/del/update all handled
+ *        by the same worker/main thread
+ *      - Packets from client to threads - syn packets, can be disctributed
+ *        based on incoming interface affinity to the cpu core pinned to
+ *        the thread or a simple sequence number based distribution
+ *        if thread per interface is not scaling
+ *      - Response packets from server towards clients - syn-acks, are
+ *        forced to the same thread that created the cache entry
+ *        using SR and the destination of SR v6 address assigned
+ *        to the core/thread. This adderss is sent as an ioam option
+ *        in the syn that can be then used on the other side to
+ *        populate v6 dst address in the response
+ *      - Timeout: timer wheel per thread is used to track the syn-ack wait
+ *        time. The timer wheel tick is updated via an input node per thread.
+ *
+ * Application facing node/Service side cache:
+ *  - Single pool of cache entries.
+ *  - Cache entry is created when SYN is received. Caches the ioam
+ *    header. Hash table entry is created based on 5 tuple and
+ *    TCP seq no to pool index
+ *  - Response SYN-ACK processed by looking up pool index in hash table
+ *    and cache entry in the pool is used to get the ioam header rewrite
+ *    string. Entry is freed from pool and hash table after use.
+ *  - Locking/Synchronization: Currently this functionality is deployed
+ *    with main/single thread only. Hence no locking is used.
+ *  - Deployment: A VPP node per application server servicing anycast
+ *    address is expected. Locking/synchronization needed when the server
+ *    /application facing node is started with multiple worker threads.
+ *
+ */
+
+/*
+ * Application facing server side caching:
+ * Cache entry for ioam header
+ * Currently caters to TCP and relies on
+ * TCP - 5 tuples + seqno to cache and reinsert
+ * ioam header b/n TCP request response
+ */
+typedef struct
+{
+  ip6_address_t src_address;
+  ip6_address_t dst_address;
+  u16 src_port;
+  u16 dst_port;
+  u8 protocol;
+  u32 seq_no;
+  ip6_address_t next_hop;
+  u16 my_address_offset;
+  u8 *ioam_rewrite_string;
+} ioam_cache_entry_t;
+
+/*
+ * Cache entry for anycast server selection
+ * Works for TCP as 5 tuple + sequence number
+ * is required for request response matching
+ * max_responses expected is set based on number
+ *              of SR tunnels for the dst_address
+ * Timeout or all response_received = max_responses
+ *            will clear the entry
+ * buffer_index index of the response msg vlib buffer
+ *           that is currently the best response
+ */
+typedef struct
+{
+  u32 pool_id;
+  u32 pool_index;
+  ip6_address_t src_address;
+  ip6_address_t dst_address;
+  u16 src_port;
+  u16 dst_port;
+  u8 protocol;
+  u32 seq_no;
+  u32 buffer_index;
+  ip6_hop_by_hop_header_t *hbh;        //pointer to hbh header in the buffer
+  u64 created_at;
+  u8 response_received;
+  u8 max_responses;
+  u32 stop_timer_handle;
+  /** Handle returned from tw_start_timer */
+  u32 timer_handle;
+  /** entry should expire at this clock tick */
+  u32 expected_to_expire;
+} ioam_cache_ts_entry_t;
+
+/*
+ * Per thread tunnel selection cache stats
+ */
+typedef struct
+{
+  u64 inuse;
+  u64 add_failed;
+} ioam_cache_ts_pool_stats_t;
+
+/* Server side: iOAM header caching */
+#define MAX_CACHE_ENTRIES 4096
+/* M-Anycast: Cache for SR tunnel selection */
+#define MAX_CACHE_TS_ENTRIES 1048576
+
+#define IOAM_CACHE_TABLE_DEFAULT_HASH_NUM_BUCKETS (4 * 1024)
+#define IOAM_CACHE_TABLE_DEFAULT_HASH_MEMORY_SIZE (2<<20)
+
+typedef struct
+{
+  /* API message ID base */
+  u16 msg_id_base;
+
+  /* Pool of ioam_cache_buffer_t */
+  ioam_cache_entry_t *ioam_rewrite_pool;
+
+  /* For steering packets ioam cache entry is followed by
+   * SR header. This is the SR rewrite template */
+  u8 *sr_rewrite_template;
+  /* The current rewrite string being used */
+  u8 *rewrite;
+  u8 rewrite_pool_index_offset;
+
+  u64 lookup_table_nbuckets;
+  u64 lookup_table_size;
+  clib_bihash_8_8_t ioam_rewrite_cache_table;
+
+  /* M-Anycast: Pool of ioam_cache_ts_entry_t per thread */
+  ioam_cache_ts_entry_t **ioam_ts_pool;
+  ioam_cache_ts_pool_stats_t *ts_stats;
+  /** per thread single-wheel */
+  tw_timer_wheel_16t_2w_512sl_t *timer_wheels;
+
+  /*
+   * Selection criteria: oneway delay: Server to M-Anycast
+   * or RTT
+   */
+  bool criteria_oneway;
+  u8 wait_for_responses;
+
+  /* convenience */
+  vlib_main_t *vlib_main;
+
+  uword cache_hbh_slot;
+  uword ts_hbh_slot;
+  u32 ip6_hbh_pop_node_index;
+  u32 error_node_index;
+  u32 cleanup_process_node_index;
+} ioam_cache_main_t;
+
+ioam_cache_main_t ioam_cache_main;
+
+vlib_node_registration_t ioam_cache_node;
+vlib_node_registration_t ioam_cache_ts_node;
+
+/*  Compute flow hash.  We'll use it to select which Sponge to use for this
+ *  flow.  And other things.
+ *  ip6_compute_flow_hash in ip6.h doesnt locate tcp/udp when
+ *  ext headers are present. While it could be made to it will be a
+ *  performance hit for ECMP flows.
+ *  HEnce this function here, with L4 information directly input
+ *  Useful when tcp/udp headers are already located in presence of
+ *  ext headers
+ */
+always_inline u32
+ip6_compute_flow_hash_ext (const ip6_header_t * ip,
+                          u8 protocol,
+                          u16 src_port,
+                          u16 dst_port, flow_hash_config_t flow_hash_config)
+{
+  u64 a, b, c;
+  u64 t1, t2;
+
+  t1 = (ip->src_address.as_u64[0] ^ ip->src_address.as_u64[1]);
+  t1 = (flow_hash_config & IP_FLOW_HASH_SRC_ADDR) ? t1 : 0;
+
+  t2 = (ip->dst_address.as_u64[0] ^ ip->dst_address.as_u64[1]);
+  t2 = (flow_hash_config & IP_FLOW_HASH_DST_ADDR) ? t2 : 0;
+
+  a = (flow_hash_config & IP_FLOW_HASH_REVERSE_SRC_DST) ? t2 : t1;
+  b = (flow_hash_config & IP_FLOW_HASH_REVERSE_SRC_DST) ? t1 : t2;
+  b ^= (flow_hash_config & IP_FLOW_HASH_PROTO) ? protocol : 0;
+
+  t1 = src_port;
+  t2 = dst_port;
+
+  t1 = (flow_hash_config & IP_FLOW_HASH_SRC_PORT) ? t1 : 0;
+  t2 = (flow_hash_config & IP_FLOW_HASH_DST_PORT) ? t2 : 0;
+
+  c = (flow_hash_config & IP_FLOW_HASH_REVERSE_SRC_DST) ?
+    ((t1 << 16) | t2) : ((t2 << 16) | t1);
+
+  hash_mix64 (a, b, c);
+  return (u32) c;
+}
+
+
+/* 2 new ioam E2E options :
+ * 1. HBH_OPTION_TYPE_IOAM_EDGE_TO_EDGE_ID: IP6 address
+ *                of ioam node that inserted ioam header
+ * 2. HBH_OPTION_TYPE_IOAM_E2E_CACHE_ID: Pool id and index
+ *                   to look up tunnel select cache entry
+ */
+#define HBH_OPTION_TYPE_IOAM_EDGE_TO_EDGE_ID 30
+#define HBH_OPTION_TYPE_IOAM_E2E_CACHE_ID 31
+
+typedef CLIB_PACKED (struct
+                    {
+                    ip6_hop_by_hop_option_t hdr; u8 e2e_type; u8 reserved[5];
+                    ip6_address_t id;
+                    }) ioam_e2e_id_option_t;
+
+typedef CLIB_PACKED (struct
+                    {
+                    ip6_hop_by_hop_option_t hdr; u8 e2e_type; u8 pool_id;
+                    u32 pool_index;
+                    }) ioam_e2e_cache_option_t;
+
+#define IOAM_E2E_ID_OPTION_RND ((sizeof(ioam_e2e_id_option_t) + 7) & ~7)
+#define IOAM_E2E_ID_HBH_EXT_LEN (IOAM_E2E_ID_OPTION_RND >> 3)
+#define IOAM_E2E_CACHE_OPTION_RND ((sizeof(ioam_e2e_cache_option_t) + 7) & ~7)
+#define IOAM_E2E_CACHE_HBH_EXT_LEN (IOAM_E2E_CACHE_OPTION_RND >> 3)
+
+static inline void
+ioam_e2e_id_rewrite_handler (ioam_e2e_id_option_t * e2e_option,
+                            vlib_buffer_t * b0)
+{
+  ip6_main_t *im = &ip6_main;
+  ip6_address_t *my_address = 0;
+  my_address =
+    ip6_interface_first_address (im, vnet_buffer (b0)->sw_if_index[VLIB_RX]);
+  if (my_address)
+    {
+      e2e_option->id.as_u64[0] = my_address->as_u64[0];
+      e2e_option->id.as_u64[1] = my_address->as_u64[1];
+    }
+}
+
+/* Following functions are for the caching of ioam header
+ * to enable reattaching it for a complete request-response
+ * message exchange */
+inline static void
+ioam_cache_entry_free (ioam_cache_entry_t * entry)
+{
+  ioam_cache_main_t *cm = &ioam_cache_main;
+  if (entry)
+    {
+      vec_free (entry->ioam_rewrite_string);
+      memset (entry, 0, sizeof (*entry));
+      pool_put (cm->ioam_rewrite_pool, entry);
+    }
+}
+
+inline static ioam_cache_entry_t *
+ioam_cache_entry_cleanup (u32 pool_index)
+{
+  ioam_cache_main_t *cm = &ioam_cache_main;
+  ioam_cache_entry_t *entry = 0;
+
+  entry = pool_elt_at_index (cm->ioam_rewrite_pool, pool_index);
+  ioam_cache_entry_free (entry);
+  return (0);
+}
+
+inline static ioam_cache_entry_t *
+ioam_cache_lookup (ip6_header_t * ip0, u16 src_port, u16 dst_port, u32 seq_no)
+{
+  ioam_cache_main_t *cm = &ioam_cache_main;
+  u32 flow_hash = ip6_compute_flow_hash_ext (ip0, ip0->protocol,
+                                            src_port, dst_port,
+                                            IP_FLOW_HASH_DEFAULT |
+                                            IP_FLOW_HASH_REVERSE_SRC_DST);
+  clib_bihash_kv_8_8_t kv, value;
+
+  kv.key = (u64) flow_hash << 32 | seq_no;
+  kv.value = 0;
+  value.key = 0;
+  value.value = 0;
+
+  if (clib_bihash_search_8_8 (&cm->ioam_rewrite_cache_table, &kv, &value) >=
+      0)
+    {
+      ioam_cache_entry_t *entry = 0;
+
+      entry = pool_elt_at_index (cm->ioam_rewrite_pool, value.value);
+      /* match */
+      if (ip6_address_compare (&ip0->src_address, &entry->dst_address) == 0 &&
+         ip6_address_compare (&ip0->dst_address, &entry->src_address) == 0 &&
+         entry->src_port == dst_port &&
+         entry->dst_port == src_port && entry->seq_no == seq_no)
+       {
+         /* If lookup is successful remove it from the hash */
+         clib_bihash_add_del_8_8 (&cm->ioam_rewrite_cache_table, &kv, 0);
+         return (entry);
+       }
+      else
+       return (0);
+
+    }
+  return (0);
+}
+
+/*
+ * Caches ioam hbh header
+ * Extends the hbh header with option to contain IP6 address of the node
+ * that caches it
+ */
+inline static int
+ioam_cache_add (vlib_buffer_t * b0,
+               ip6_header_t * ip0,
+               u16 src_port,
+               u16 dst_port, ip6_hop_by_hop_header_t * hbh0, u32 seq_no)
+{
+  ioam_cache_main_t *cm = &ioam_cache_main;
+  ioam_cache_entry_t *entry = 0;
+  u32 rewrite_len = 0, e2e_id_offset = 0;
+  u32 pool_index = 0;
+  ioam_e2e_id_option_t *e2e = 0;
+
+  pool_get_aligned (cm->ioam_rewrite_pool, entry, CLIB_CACHE_LINE_BYTES);
+  memset (entry, 0, sizeof (*entry));
+  pool_index = entry - cm->ioam_rewrite_pool;
+
+  clib_memcpy (entry->dst_address.as_u64, ip0->dst_address.as_u64,
+              sizeof (ip6_address_t));
+  clib_memcpy (entry->src_address.as_u64, ip0->src_address.as_u64,
+              sizeof (ip6_address_t));
+  entry->src_port = src_port;
+  entry->dst_port = dst_port;
+  entry->seq_no = seq_no;
+  rewrite_len = ((hbh0->length + 1) << 3);
+  vec_validate (entry->ioam_rewrite_string, rewrite_len - 1);
+  e2e = ip6_ioam_find_hbh_option (hbh0, HBH_OPTION_TYPE_IOAM_EDGE_TO_EDGE_ID);
+  if (e2e)
+    {
+      entry->next_hop.as_u64[0] = e2e->id.as_u64[0];
+      entry->next_hop.as_u64[1] = e2e->id.as_u64[1];
+    }
+  else
+    {
+      return (-1);
+    }
+  e2e_id_offset = (u8 *) e2e - (u8 *) hbh0;
+  /* setup e2e id option to insert v6 address of the node caching it */
+  clib_memcpy (entry->ioam_rewrite_string, hbh0, rewrite_len);
+  hbh0 = (ip6_hop_by_hop_header_t *) entry->ioam_rewrite_string;
+
+  /* suffix rewrite string with e2e ID option */
+  e2e = (ioam_e2e_id_option_t *) (entry->ioam_rewrite_string + e2e_id_offset);
+  ioam_e2e_id_rewrite_handler (e2e, b0);
+  entry->my_address_offset = (u8 *) (&e2e->id) - (u8 *) hbh0;
+
+  /* add it to hash, replacing and freeing any collision for now */
+  u32 flow_hash =
+    ip6_compute_flow_hash_ext (ip0, hbh0->protocol, src_port, dst_port,
+                              IP_FLOW_HASH_DEFAULT);
+  clib_bihash_kv_8_8_t kv, value;
+  kv.key = (u64) flow_hash << 32 | seq_no;
+  kv.value = 0;
+  if (clib_bihash_search_8_8 (&cm->ioam_rewrite_cache_table, &kv, &value) >=
+      0)
+    {
+      /* replace */
+      ioam_cache_entry_cleanup (value.value);
+    }
+  kv.value = pool_index;
+  clib_bihash_add_del_8_8 (&cm->ioam_rewrite_cache_table, &kv, 1);
+  return (0);
+}
+
+/* Creates SR rewrite string
+ * This is appended with ioam header on the server facing
+ * node.
+ * This SR header is necessary to attract packets towards
+ * selected Anycast server.
+ */
+inline static void
+ioam_cache_sr_rewrite_template_create (void)
+{
+  ioam_cache_main_t *cm = &ioam_cache_main;
+  ip6_address_t *segments = 0;
+  ip6_address_t *this_seg = 0;
+
+  /* This nodes address and the original dest will be
+   * filled when the packet is processed */
+  vec_add2 (segments, this_seg, 2);
+  memset (this_seg, 0xfe, 2 * sizeof (ip6_address_t));
+  cm->sr_rewrite_template = ip6_compute_rewrite_string_insert (segments);
+  vec_free (segments);
+}
+
+inline static int
+ioam_cache_table_init (vlib_main_t * vm)
+{
+  ioam_cache_main_t *cm = &ioam_cache_main;
+
+  pool_alloc_aligned (cm->ioam_rewrite_pool,
+                     MAX_CACHE_ENTRIES, CLIB_CACHE_LINE_BYTES);
+  cm->lookup_table_nbuckets = IOAM_CACHE_TABLE_DEFAULT_HASH_NUM_BUCKETS;
+  cm->lookup_table_nbuckets = 1 << max_log2 (cm->lookup_table_nbuckets);
+  cm->lookup_table_size = IOAM_CACHE_TABLE_DEFAULT_HASH_MEMORY_SIZE;
+
+  clib_bihash_init_8_8 (&cm->ioam_rewrite_cache_table,
+                       "ioam rewrite cache table",
+                       cm->lookup_table_nbuckets, cm->lookup_table_size);
+  /* Create SR rewrite template */
+  ioam_cache_sr_rewrite_template_create ();
+  return (1);
+}
+
+inline static int
+ioam_cache_table_destroy (vlib_main_t * vm)
+{
+  ioam_cache_main_t *cm = &ioam_cache_main;
+  ioam_cache_entry_t *entry = 0;
+  /* free pool and hash table */
+  clib_bihash_free_8_8 (&cm->ioam_rewrite_cache_table);
+  pool_foreach (entry, cm->ioam_rewrite_pool, (
+                                               {
+                                               ioam_cache_entry_free (entry);
+                                               }));
+  pool_free (cm->ioam_rewrite_pool);
+  cm->ioam_rewrite_pool = 0;
+  vec_free (cm->sr_rewrite_template);
+  cm->sr_rewrite_template = 0;
+  return (0);
+}
+
+inline static u8 *
+format_ioam_cache_entry (u8 * s, va_list * args)
+{
+  ioam_cache_entry_t *e = va_arg (*args, ioam_cache_entry_t *);
+  ioam_cache_main_t *cm = &ioam_cache_main;
+
+  s = format (s, "%d: %U:%d to  %U:%d seq_no %lu\n",
+             (e - cm->ioam_rewrite_pool),
+             format_ip6_address, &e->src_address,
+             e->src_port,
+             format_ip6_address, &e->dst_address, e->dst_port, e->seq_no);
+  s = format (s, "  %U",
+             format_ip6_hop_by_hop_ext_hdr,
+             (ip6_hop_by_hop_header_t *) e->ioam_rewrite_string,
+             vec_len (e->ioam_rewrite_string) - 1);
+  return s;
+}
+
+void ioam_cache_ts_timer_node_enable (vlib_main_t * vm, u8 enable);
+
+#define IOAM_CACHE_TS_TIMEOUT 1.0      //SYN timeout 1 sec
+#define IOAM_CACHE_TS_TICK 100e-3
+/* Timer delays as multiples of 100ms */
+#define IOAM_CACHE_TS_TIMEOUT_TICKS IOAM_CACHE_TS_TICK*9
+#define TIMER_HANDLE_INVALID ((u32) ~0)
+
+
+void expired_cache_ts_timer_callback (u32 * expired_timers);
+
+/*
+ * Following functions are to manage M-Anycast server selection
+ * cache
+ * There is a per worker thread pool to create a cache entry
+ * for a TCP SYN received. TCP SYN-ACK contians ioam header
+ * with HBH_OPTION_TYPE_IOAM_E2E_CACHE_ID option to point to the
+ * entry.
+ */
+inline static int
+ioam_cache_ts_table_init (vlib_main_t * vm)
+{
+  ioam_cache_main_t *cm = &ioam_cache_main;
+  int no_of_threads = vec_len (vlib_worker_threads);
+  int i;
+
+  vec_validate_aligned (cm->ioam_ts_pool, no_of_threads - 1,
+                       CLIB_CACHE_LINE_BYTES);
+  vec_validate_aligned (cm->ts_stats, no_of_threads - 1,
+                       CLIB_CACHE_LINE_BYTES);
+  vec_validate_aligned (cm->timer_wheels, no_of_threads - 1,
+                       CLIB_CACHE_LINE_BYTES);
+  cm->lookup_table_nbuckets = IOAM_CACHE_TABLE_DEFAULT_HASH_NUM_BUCKETS;
+  cm->lookup_table_nbuckets = 1 << max_log2 (cm->lookup_table_nbuckets);
+  cm->lookup_table_size = IOAM_CACHE_TABLE_DEFAULT_HASH_MEMORY_SIZE;
+  for (i = 0; i < no_of_threads; i++)
+    {
+      pool_alloc_aligned (cm->ioam_ts_pool[i],
+                         MAX_CACHE_TS_ENTRIES, CLIB_CACHE_LINE_BYTES);
+      memset (&cm->ts_stats[i], 0, sizeof (ioam_cache_ts_pool_stats_t));
+      tw_timer_wheel_init_16t_2w_512sl (&cm->timer_wheels[i],
+                                       expired_cache_ts_timer_callback,
+                                       IOAM_CACHE_TS_TICK
+                                       /* timer period 100ms */ ,
+                                       10e4);
+      cm->timer_wheels[i].last_run_time = vlib_time_now (vm);
+    }
+  ioam_cache_ts_timer_node_enable (vm, 1);
+  return (1);
+}
+
+always_inline void
+ioam_cache_ts_timer_set (ioam_cache_main_t * cm,
+                        ioam_cache_ts_entry_t * entry, u32 interval)
+{
+  entry->timer_handle
+    = tw_timer_start_16t_2w_512sl (&cm->timer_wheels[entry->pool_id],
+                                  entry->pool_index, 1, interval);
+}
+
+always_inline void
+ioam_cache_ts_timer_reset (ioam_cache_main_t * cm,
+                          ioam_cache_ts_entry_t * entry)
+{
+  tw_timer_stop_16t_2w_512sl (&cm->timer_wheels[entry->pool_id],
+                             entry->timer_handle);
+  entry->timer_handle = TIMER_HANDLE_INVALID;
+}
+
+inline static void
+ioam_cache_ts_entry_free (u32 thread_id,
+                         ioam_cache_ts_entry_t * entry, u32 node_index)
+{
+  ioam_cache_main_t *cm = &ioam_cache_main;
+  vlib_main_t *vm = cm->vlib_main;
+  vlib_frame_t *nf = 0;
+  u32 *to_next;
+
+  if (entry)
+    {
+      if (entry->hbh != 0)
+       {
+         nf = vlib_get_frame_to_node (vm, node_index);
+         nf->n_vectors = 0;
+         to_next = vlib_frame_vector_args (nf);
+         nf->n_vectors = 1;
+         to_next[0] = entry->buffer_index;
+         vlib_put_frame_to_node (vm, node_index, nf);
+       }
+      pool_put (cm->ioam_ts_pool[thread_id], entry);
+      cm->ts_stats[thread_id].inuse--;
+      memset (entry, 0, sizeof (*entry));
+    }
+}
+
+inline static int
+ioam_cache_ts_table_destroy (vlib_main_t * vm)
+{
+  ioam_cache_main_t *cm = &ioam_cache_main;
+  ioam_cache_ts_entry_t *entry = 0;
+  int no_of_threads = vec_len (vlib_worker_threads);
+  int i;
+
+  /* free pool and hash table */
+  for (i = 0; i < no_of_threads; i++)
+    {
+      pool_foreach (entry, cm->ioam_ts_pool[i], (
+                                                 {
+                                                 ioam_cache_ts_entry_free (i,
+                                                                           entry,
+                                                                           cm->error_node_index);
+                                                 }
+                   ));
+      pool_free (cm->ioam_ts_pool[i]);
+      cm->ioam_ts_pool = 0;
+      tw_timer_wheel_free_16t_2w_512sl (&cm->timer_wheels[i]);
+    }
+  vec_free (cm->ioam_ts_pool);
+  return (0);
+}
+
+inline static int
+ioam_cache_ts_entry_cleanup (u32 thread_id, u32 pool_index)
+{
+  ioam_cache_main_t *cm = &ioam_cache_main;
+  ioam_cache_ts_entry_t *entry = 0;
+
+  entry = pool_elt_at_index (cm->ioam_ts_pool[thread_id], pool_index);
+  ioam_cache_ts_entry_free (thread_id, entry, cm->error_node_index);
+  return (0);
+}
+
+/*
+ * Caches buffer for ioam SR tunnel select for Anycast service
+ */
+inline static int
+ioam_cache_ts_add (ip6_header_t * ip0,
+                  u16 src_port,
+                  u16 dst_port,
+                  u32 seq_no,
+                  u8 max_responses, u64 now, u32 thread_id, u32 * pool_index)
+{
+  ioam_cache_main_t *cm = &ioam_cache_main;
+  ioam_cache_ts_entry_t *entry = 0;
+
+  if (cm->ts_stats[thread_id].inuse == MAX_CACHE_TS_ENTRIES)
+    {
+      cm->ts_stats[thread_id].add_failed++;
+      return (-1);
+    }
+
+  pool_get_aligned (cm->ioam_ts_pool[thread_id], entry,
+                   CLIB_CACHE_LINE_BYTES);
+  memset (entry, 0, sizeof (*entry));
+  *pool_index = entry - cm->ioam_ts_pool[thread_id];
+
+  clib_memcpy (entry->dst_address.as_u64, ip0->dst_address.as_u64,
+              sizeof (ip6_address_t));
+  clib_memcpy (entry->src_address.as_u64, ip0->src_address.as_u64,
+              sizeof (ip6_address_t));
+  entry->src_port = src_port;
+  entry->dst_port = dst_port;
+  entry->seq_no = seq_no;
+  entry->response_received = 0;
+  entry->max_responses = max_responses;
+  entry->created_at = now;
+  entry->hbh = 0;
+  entry->buffer_index = 0;
+  entry->pool_id = thread_id;
+  entry->pool_index = *pool_index;
+  ioam_cache_ts_timer_set (cm, entry, IOAM_CACHE_TS_TIMEOUT);
+  cm->ts_stats[thread_id].inuse++;
+  return (0);
+}
+
+inline static void
+ioam_cache_ts_send (u32 thread_id, i32 pool_index)
+{
+  ioam_cache_main_t *cm = &ioam_cache_main;
+  ioam_cache_ts_entry_t *entry = 0;
+
+  entry = pool_elt_at_index (cm->ioam_ts_pool[thread_id], pool_index);
+  if (!pool_is_free (cm->ioam_ts_pool[thread_id], entry) && entry)
+    {
+      /* send and free pool entry */
+      ioam_cache_ts_entry_free (thread_id, entry, cm->ip6_hbh_pop_node_index);
+    }
+}
+
+inline static void
+ioam_cache_ts_check_and_send (u32 thread_id, i32 pool_index)
+{
+  ioam_cache_main_t *cm = &ioam_cache_main;
+  ioam_cache_ts_entry_t *entry = 0;
+  entry = pool_elt_at_index (cm->ioam_ts_pool[thread_id], pool_index);
+  if (entry && entry->hbh)
+    {
+      if (entry->response_received == entry->max_responses ||
+         entry->created_at + IOAM_CACHE_TS_TIMEOUT <=
+         vlib_time_now (cm->vlib_main))
+       {
+         ioam_cache_ts_timer_reset (cm, entry);
+         ioam_cache_ts_send (thread_id, pool_index);
+       }
+    }
+}
+
+inline static int
+ioam_cache_ts_update (u32 thread_id,
+                     i32 pool_index,
+                     u32 buffer_index, ip6_hop_by_hop_header_t * hbh)
+{
+  ioam_cache_main_t *cm = &ioam_cache_main;
+  ioam_cache_ts_entry_t *entry = 0;
+  vlib_main_t *vm = cm->vlib_main;
+  vlib_frame_t *nf = 0;
+  u32 *to_next;
+
+  entry = pool_elt_at_index (cm->ioam_ts_pool[thread_id], pool_index);
+  if (!pool_is_free (cm->ioam_ts_pool[thread_id], entry) && entry)
+    {
+      /* drop existing buffer */
+      if (entry->hbh != 0)
+       {
+         nf = vlib_get_frame_to_node (vm, cm->error_node_index);
+         nf->n_vectors = 0;
+         to_next = vlib_frame_vector_args (nf);
+         nf->n_vectors = 1;
+         to_next[0] = entry->buffer_index;
+         vlib_put_frame_to_node (vm, cm->error_node_index, nf);
+       }
+      /* update */
+      entry->buffer_index = buffer_index;
+      entry->hbh = hbh;
+      /* check and send */
+      ioam_cache_ts_check_and_send (thread_id, pool_index);
+      return (0);
+    }
+  return (-1);
+}
+
+/*
+ * looks up the entry based on the e2e option pool index
+ * result = 0 found the entry
+ * result < 0 indicates failture to find an entry
+ */
+inline static int
+ioam_cache_ts_lookup (ip6_header_t * ip0,
+                     u8 protocol,
+                     u16 src_port,
+                     u16 dst_port,
+                     u32 seq_no,
+                     ip6_hop_by_hop_header_t ** hbh,
+                     u32 * pool_index, u8 * thread_id, u8 response_seen)
+{
+  ioam_cache_main_t *cm = &ioam_cache_main;
+  ip6_hop_by_hop_header_t *hbh0 = 0;
+  ioam_e2e_cache_option_t *e2e = 0;
+
+  hbh0 = (ip6_hop_by_hop_header_t *) (ip0 + 1);
+  e2e =
+    (ioam_e2e_cache_option_t *) ((u8 *) hbh0 + cm->rewrite_pool_index_offset);
+  if ((u8 *) e2e < ((u8 *) hbh0 + ((hbh0->length + 1) << 3))
+      && e2e->hdr.type == HBH_OPTION_TYPE_IOAM_E2E_CACHE_ID)
+    {
+      ioam_cache_ts_entry_t *entry = 0;
+      *pool_index = e2e->pool_index;
+      *thread_id = e2e->pool_id;
+      entry = pool_elt_at_index (cm->ioam_ts_pool[*thread_id], *pool_index);
+      /* match */
+      if (entry &&
+         ip6_address_compare (&ip0->src_address, &entry->dst_address) == 0 &&
+         ip6_address_compare (&ip0->dst_address, &entry->src_address) == 0 &&
+         entry->src_port == dst_port &&
+         entry->dst_port == src_port && entry->seq_no == seq_no)
+       {
+         *hbh = entry->hbh;
+         entry->response_received += response_seen;
+         return (0);
+       }
+      else if (entry)
+       {
+         return (-1);
+       }
+    }
+  return (-1);
+}
+
+inline static u8 *
+format_ioam_cache_ts_entry (u8 * s, va_list * args)
+{
+  ioam_cache_ts_entry_t *e = va_arg (*args, ioam_cache_ts_entry_t *);
+  u32 thread_id = va_arg (*args, u32);
+  ioam_cache_main_t *cm = &ioam_cache_main;
+  ioam_e2e_id_option_t *e2e = 0;
+  vlib_main_t *vm = cm->vlib_main;
+  clib_time_t *ct = &vm->clib_time;
+
+  if (e && e->hbh)
+    {
+      e2e =
+       ip6_ioam_find_hbh_option (e->hbh,
+                                 HBH_OPTION_TYPE_IOAM_EDGE_TO_EDGE_ID);
+
+      s =
+       format (s,
+               "%d: %U:%d to  %U:%d seq_no %u buffer %u %U \n\t\tCreated at %U Received %d\n",
+               (e - cm->ioam_ts_pool[thread_id]), format_ip6_address,
+               &e->src_address, e->src_port, format_ip6_address,
+               &e->dst_address, e->dst_port, e->seq_no, e->buffer_index,
+               format_ip6_address, e2e ? &e2e->id : 0, format_time_interval,
+               "h:m:s:u",
+               (e->created_at -
+                vm->cpu_time_main_loop_start) * ct->seconds_per_clock,
+               e->response_received);
+    }
+  else
+    {
+      s =
+       format (s,
+               "%d: %U:%d to  %U:%d seq_no %u Buffer %u \n\t\tCreated at %U Received %d\n",
+               (e - cm->ioam_ts_pool[thread_id]), format_ip6_address,
+               &e->src_address, e->src_port, format_ip6_address,
+               &e->dst_address, e->dst_port, e->seq_no, e->buffer_index,
+               format_time_interval, "h:m:s:u",
+               (e->created_at -
+                vm->cpu_time_main_loop_start) * ct->seconds_per_clock,
+               e->response_received);
+    }
+  return s;
+}
+
+/*
+ * Get extended rewrite string for iOAM data in v6
+ * This makes space for an e2e options to carry cache pool info
+ * and manycast server address.
+ * It set the rewrite string per configs in ioam ip6 + new option
+ * for cache along with offset to the option to populate cache
+ * pool id and index
+ */
+static inline int
+ip6_ioam_ts_cache_set_rewrite (void)
+{
+  ip6_hop_by_hop_ioam_main_t *hm = &ip6_hop_by_hop_ioam_main;
+  ioam_cache_main_t *cm = &ioam_cache_main;
+  ip6_hop_by_hop_header_t *hbh;
+  u32 rewrite_len = 0;
+  ioam_e2e_cache_option_t *e2e = 0;
+  ioam_e2e_id_option_t *e2e_id = 0;
+
+  vec_free (cm->rewrite);
+  ip6_ioam_set_rewrite (&(cm->rewrite), hm->has_trace_option,
+                       hm->has_pot_option, hm->has_seqno_option);
+  hbh = (ip6_hop_by_hop_header_t *) cm->rewrite;
+  rewrite_len = ((hbh->length + 1) << 3);
+  vec_validate (cm->rewrite,
+               rewrite_len - 1 + IOAM_E2E_CACHE_OPTION_RND +
+               IOAM_E2E_ID_OPTION_RND);
+  hbh = (ip6_hop_by_hop_header_t *) cm->rewrite;
+  /* setup e2e id option to insert pool id and index of the node caching it */
+  hbh->length += IOAM_E2E_CACHE_HBH_EXT_LEN + IOAM_E2E_ID_HBH_EXT_LEN;
+  cm->rewrite_pool_index_offset = rewrite_len;
+  e2e = (ioam_e2e_cache_option_t *) (cm->rewrite + rewrite_len);
+  e2e->hdr.type = HBH_OPTION_TYPE_IOAM_E2E_CACHE_ID
+    | HBH_OPTION_TYPE_SKIP_UNKNOWN;
+  e2e->hdr.length = sizeof (ioam_e2e_cache_option_t) -
+    sizeof (ip6_hop_by_hop_option_t);
+  e2e->e2e_type = 2;
+  e2e_id =
+    (ioam_e2e_id_option_t *) ((u8 *) e2e + sizeof (ioam_e2e_cache_option_t));
+  e2e_id->hdr.type =
+    HBH_OPTION_TYPE_IOAM_EDGE_TO_EDGE_ID | HBH_OPTION_TYPE_SKIP_UNKNOWN;
+  e2e_id->hdr.length =
+    sizeof (ioam_e2e_id_option_t) - sizeof (ip6_hop_by_hop_option_t);
+  e2e_id->e2e_type = 1;
+
+  return (0);
+}
+
+static inline int
+ip6_ioam_ts_cache_cleanup_rewrite (void)
+{
+  ioam_cache_main_t *cm = &ioam_cache_main;
+
+  vec_free (cm->rewrite);
+  cm->rewrite = 0;
+  cm->rewrite_pool_index_offset = 0;
+  return (0);
+}
+#endif /* __included_ioam_cache_h__ */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/plugins/ioam/ip6/ioam_cache_all_api_h.h b/src/plugins/ioam/ip6/ioam_cache_all_api_h.h
new file mode 100644 (file)
index 0000000..61272a5
--- /dev/null
@@ -0,0 +1,16 @@
+/*
+ * Copyright (c) 2017 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/* Include the generated file, see BUILT_SOURCES in Makefile.am */
+#include <ioam/ip6/ioam_cache.api.h>
diff --git a/src/plugins/ioam/ip6/ioam_cache_msg_enum.h b/src/plugins/ioam/ip6/ioam_cache_msg_enum.h
new file mode 100644 (file)
index 0000000..8afd067
--- /dev/null
@@ -0,0 +1,28 @@
+/*
+ * Copyright (c) 2017 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef included_ioam_cache_msg_enum_h
+#define included_ioam_cache_msg_enum_h
+
+#include <vppinfra/byte_order.h>
+
+#define vl_msg_id(n,h) n,
+typedef enum {
+#include <ioam/ip6/ioam_cache_all_api_h.h>
+    /* We'll want to know how many messages IDs we need... */
+    VL_MSG_FIRST_AVAILABLE,
+} vl_msg_id_t;
+#undef vl_msg_id
+
+#endif /* included_ioam_cache_msg_enum_h */
diff --git a/src/plugins/ioam/ip6/ioam_cache_node.c b/src/plugins/ioam/ip6/ioam_cache_node.c
new file mode 100644 (file)
index 0000000..6c8a038
--- /dev/null
@@ -0,0 +1,424 @@
+/*
+ * Copyright (c) 2017 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * This file implements caching of ioam header and reattaching
+ * it in response message by performing request-response matching.
+ *  Works for TCP SYN/SYN-ACK.
+ * This feature is used for anycast server selection.
+ * ioam data thus cached is used to measure and get complete round trip
+ * network path to help in server selection.
+ * There are 2 graph nodes defined to :
+ * 1. process packets that contain iOAM header and cache it
+ * 2. process TCP SYN-ACKs and reattach ioam header from the
+ *    cache corresponding to TCP-SYN
+ * These graph nodes are attached to the vnet graph based on
+ * ioam cache and classifier configs.
+ * e.g.
+ * If db06::06 is the anycast service IP6 address:
+ *
+ * set ioam ip6 cache
+ *
+ * Apply this classifier on interface where requests for anycast service are received:
+ * classify session acl-hit-next ip6-node ip6-lookup table-index 0 match l3 ip6 dst db06::06
+ *    ioam-decap anycast <<< ioam-decap is hooked to cache when set ioam ip6 cache is enabled
+ *
+ * Apply this classifier on interface where responses from anycast service are received:
+ * classify session acl-hit-next ip6-node ip6-add-from-cache-hop-by-hop table-index 0 match l3
+ *    ip6 src db06::06 ioam-encap anycast-response
+ *
+ */
+#include <vlib/vlib.h>
+#include <vnet/vnet.h>
+#include <vnet/pg/pg.h>
+#include <vppinfra/error.h>
+#include <vnet/ip/ip.h>
+#include <ioam/ip6/ioam_cache.h>
+#include <vnet/ip/ip6_hop_by_hop.h>
+#include <vnet/ip/ip6_hop_by_hop_packet.h>
+
+typedef struct
+{
+  u32 next_index;
+  u32 flow_label;
+} cache_trace_t;
+
+/* packet trace format function */
+static u8 *
+format_cache_trace (u8 * s, va_list * args)
+{
+  CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
+  CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
+  cache_trace_t *t = va_arg (*args, cache_trace_t *);
+
+  s = format (s, "CACHE: flow_label %d, next index %d",
+             t->flow_label, t->next_index);
+  return s;
+}
+
+vlib_node_registration_t ioam_cache_node;
+
+#define foreach_cache_error \
+_(RECORDED, "ip6 iOAM headers cached")
+
+typedef enum
+{
+#define _(sym,str) CACHE_ERROR_##sym,
+  foreach_cache_error
+#undef _
+    CACHE_N_ERROR,
+} cache_error_t;
+
+static char *cache_error_strings[] = {
+#define _(sym,string) string,
+  foreach_cache_error
+#undef _
+};
+
+typedef enum
+{
+  IOAM_CACHE_NEXT_POP_HBYH,
+  IOAM_CACHE_N_NEXT,
+} cache_next_t;
+
+static uword
+ip6_ioam_cache_node_fn (vlib_main_t * vm,
+                       vlib_node_runtime_t * node, vlib_frame_t * frame)
+{
+  u32 n_left_from, *from, *to_next;
+  cache_next_t next_index;
+  u32 recorded = 0;
+
+  from = vlib_frame_vector_args (frame);
+  n_left_from = frame->n_vectors;
+  next_index = node->cached_next_index;
+
+  while (n_left_from > 0)
+    {
+      u32 n_left_to_next;
+
+      vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
+      // TODO: Dual loop
+      while (n_left_from > 0 && n_left_to_next > 0)
+       {
+         u32 bi0;
+         vlib_buffer_t *p0;
+         u32 next0 = IOAM_CACHE_NEXT_POP_HBYH;
+         ip6_header_t *ip0;
+         ip6_hop_by_hop_header_t *hbh0;
+         tcp_header_t *tcp0;
+         u32 tcp_offset0;
+
+         /* speculatively enqueue p0 to the current next frame */
+         bi0 = from[0];
+         to_next[0] = bi0;
+         from += 1;
+         to_next += 1;
+         n_left_from -= 1;
+         n_left_to_next -= 1;
+
+         p0 = vlib_get_buffer (vm, bi0);
+         ip0 = vlib_buffer_get_current (p0);
+         if (IP_PROTOCOL_TCP ==
+             ip6_locate_header (p0, ip0, IP_PROTOCOL_TCP, &tcp_offset0))
+           {
+             tcp0 = (tcp_header_t *) ((u8 *) ip0 + tcp_offset0);
+             if ((tcp0->flags & TCP_FLAG_SYN) == TCP_FLAG_SYN &&
+                 (tcp0->flags & TCP_FLAG_ACK) == 0)
+               {
+                 /* Cache the ioam hbh header */
+                 hbh0 = (ip6_hop_by_hop_header_t *) (ip0 + 1);
+                 if (0 == ioam_cache_add (p0,
+                                          ip0,
+                                          clib_net_to_host_u16
+                                          (tcp0->src_port),
+                                          clib_net_to_host_u16
+                                          (tcp0->dst_port), hbh0,
+                                          clib_net_to_host_u32
+                                          (tcp0->seq_number) + 1))
+                   {
+                     recorded++;
+                   }
+               }
+           }
+         if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE)))
+           {
+             if (p0->flags & VLIB_BUFFER_IS_TRACED)
+               {
+                 cache_trace_t *t =
+                   vlib_add_trace (vm, node, p0, sizeof (*t));
+                 t->flow_label =
+                   clib_net_to_host_u32
+                   (ip0->ip_version_traffic_class_and_flow_label);
+                 t->next_index = next0;
+               }
+           }
+         /* verify speculative enqueue, maybe switch current next frame */
+         vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
+                                          to_next, n_left_to_next,
+                                          bi0, next0);
+       }
+
+      vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+    }
+
+  vlib_node_increment_counter (vm, ioam_cache_node.index,
+                              CACHE_ERROR_RECORDED, recorded);
+  return frame->n_vectors;
+}
+
+/*
+ * Node for IP6 iOAM header cache
+ */
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE (ioam_cache_node) =
+{
+  .function = ip6_ioam_cache_node_fn,
+  .name = "ip6-ioam-cache",
+  .vector_size = sizeof (u32),
+  .format_trace = format_cache_trace,
+  .type = VLIB_NODE_TYPE_INTERNAL,
+  .n_errors = ARRAY_LEN (cache_error_strings),
+  .error_strings = cache_error_strings,
+  .n_next_nodes = IOAM_CACHE_N_NEXT,
+  /* edit / add dispositions here */
+  .next_nodes =
+  {
+    [IOAM_CACHE_NEXT_POP_HBYH] = "ip6-pop-hop-by-hop"
+  },
+};
+/* *INDENT-ON* */
+
+typedef struct
+{
+  u32 next_index;
+} ip6_add_from_cache_hbh_trace_t;
+
+/* packet trace format function */
+static u8 *
+format_ip6_add_from_cache_hbh_trace (u8 * s, va_list * args)
+{
+  CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
+  CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
+  ip6_add_from_cache_hbh_trace_t *t = va_arg (*args,
+                                             ip6_add_from_cache_hbh_trace_t
+                                             *);
+
+  s = format (s, "IP6_ADD_FROM_CACHE_HBH: next index %d", t->next_index);
+  return s;
+}
+
+vlib_node_registration_t ip6_add_from_cache_hbh_node;
+
+#define foreach_ip6_add_from_cache_hbh_error \
+_(PROCESSED, "Pkts w/ added ip6 hop-by-hop options")
+
+typedef enum
+{
+#define _(sym,str) IP6_ADD_FROM_CACHE_HBH_ERROR_##sym,
+  foreach_ip6_add_from_cache_hbh_error
+#undef _
+    IP6_ADD_FROM_CACHE_HBH_N_ERROR,
+} ip6_add_from_cache_hbh_error_t;
+
+static char *ip6_add_from_cache_hbh_error_strings[] = {
+#define _(sym,string) string,
+  foreach_ip6_add_from_cache_hbh_error
+#undef _
+};
+
+#define foreach_ip6_ioam_cache_input_next        \
+  _(IP6_LOOKUP, "ip6-lookup")                   \
+  _(DROP, "error-drop")
+
+typedef enum
+{
+#define _(s,n) IP6_IOAM_CACHE_INPUT_NEXT_##s,
+  foreach_ip6_ioam_cache_input_next
+#undef _
+    IP6_IOAM_CACHE_INPUT_N_NEXT,
+} ip6_ioam_cache_input_next_t;
+
+
+static uword
+ip6_add_from_cache_hbh_node_fn (vlib_main_t * vm,
+                               vlib_node_runtime_t * node,
+                               vlib_frame_t * frame)
+{
+  ioam_cache_main_t *cm = &ioam_cache_main;
+  u32 n_left_from, *from, *to_next;
+  ip_lookup_next_t next_index;
+  u32 processed = 0;
+  u8 *rewrite = 0;
+  u32 rewrite_len = 0;
+  u32 sr_rewrite_len = vec_len (cm->sr_rewrite_template);
+
+  from = vlib_frame_vector_args (frame);
+  n_left_from = frame->n_vectors;
+  next_index = node->cached_next_index;
+
+  while (n_left_from > 0)
+    {
+      u32 n_left_to_next;
+
+      vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
+      // TODO: Dual loop
+      while (n_left_from > 0 && n_left_to_next > 0)
+       {
+         u32 bi0;
+         vlib_buffer_t *b0;
+         u32 next0;
+         ip6_header_t *ip0;
+         ip6_hop_by_hop_header_t *hbh0;
+         ip6_sr_header_t *srh0 = 0;
+         u64 *copy_src0, *copy_dst0;
+         u16 new_l0;
+         tcp_header_t *tcp0;
+         u32 tcp_offset0;
+         ioam_cache_entry_t *entry = 0;
+
+         next0 = IP6_IOAM_CACHE_INPUT_NEXT_IP6_LOOKUP;
+         /* speculatively enqueue b0 to the current next frame */
+         bi0 = from[0];
+         to_next[0] = bi0;
+         from += 1;
+         to_next += 1;
+         n_left_from -= 1;
+         n_left_to_next -= 1;
+
+         b0 = vlib_get_buffer (vm, bi0);
+
+         ip0 = vlib_buffer_get_current (b0);
+         if (IP_PROTOCOL_TCP !=
+             ip6_locate_header (b0, ip0, IP_PROTOCOL_TCP, &tcp_offset0))
+           {
+             goto TRACE0;
+           }
+         tcp0 = (tcp_header_t *) ((u8 *) ip0 + tcp_offset0);
+         if (((tcp0->flags & TCP_FLAG_SYN) == TCP_FLAG_SYN &&
+              (tcp0->flags & TCP_FLAG_ACK) == TCP_FLAG_ACK) ||
+             (tcp0->flags & TCP_FLAG_RST) == TCP_FLAG_RST)
+           {
+             if (0 != (entry = ioam_cache_lookup (ip0,
+                                                  clib_net_to_host_u16
+                                                  (tcp0->src_port),
+                                                  clib_net_to_host_u16
+                                                  (tcp0->dst_port),
+                                                  clib_net_to_host_u32
+                                                  (tcp0->ack_number))))
+               {
+                 rewrite = entry->ioam_rewrite_string;
+                 rewrite_len = vec_len (rewrite);
+               }
+             else
+               {
+                 next0 = IP6_IOAM_CACHE_INPUT_NEXT_DROP;
+                 goto TRACE0;
+               }
+           }
+         else
+           goto TRACE0;
+
+
+         /* Copy the ip header left by the required amount */
+         copy_dst0 = (u64 *) (((u8 *) ip0) - (rewrite_len + sr_rewrite_len));
+         copy_src0 = (u64 *) ip0;
+
+         copy_dst0[0] = copy_src0[0];
+         copy_dst0[1] = copy_src0[1];
+         copy_dst0[2] = copy_src0[2];
+         copy_dst0[3] = copy_src0[3];
+         copy_dst0[4] = copy_src0[4];
+         vlib_buffer_advance (b0, -(word) (rewrite_len + sr_rewrite_len));
+         ip0 = vlib_buffer_get_current (b0);
+
+         hbh0 = (ip6_hop_by_hop_header_t *) (ip0 + 1);
+         srh0 = (ip6_sr_header_t *) ((u8 *) hbh0 + rewrite_len);
+         /* $$$ tune, rewrite_len is a multiple of 8 */
+         clib_memcpy (hbh0, rewrite, rewrite_len);
+         clib_memcpy (srh0, cm->sr_rewrite_template, sr_rewrite_len);
+         /* Copy dst address into the DA slot in the segment list */
+         clib_memcpy (srh0->segments, ip0->dst_address.as_u64,
+                      sizeof (ip6_address_t));
+         /* Rewrite the ip6 dst address with the first hop */
+         clib_memcpy (ip0->dst_address.as_u64, entry->next_hop.as_u64,
+                      sizeof (ip6_address_t));
+         clib_memcpy (&srh0->segments[1],
+                      (u8 *) hbh0 + entry->my_address_offset,
+                      sizeof (ip6_address_t));
+         srh0->segments_left--;
+         ioam_cache_entry_free (entry);
+
+         /* Patch the protocol chain, insert the h-b-h (type 0) header */
+         srh0->protocol = ip0->protocol;
+         hbh0->protocol = IPPROTO_IPV6_ROUTE;
+         ip0->protocol = 0;
+         new_l0 =
+           clib_net_to_host_u16 (ip0->payload_length) + rewrite_len +
+           sr_rewrite_len;
+         ip0->payload_length = clib_host_to_net_u16 (new_l0);
+         processed++;
+       TRACE0:
+         if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE)
+                            && (b0->flags & VLIB_BUFFER_IS_TRACED)))
+           {
+             ip6_add_from_cache_hbh_trace_t *t =
+               vlib_add_trace (vm, node, b0, sizeof (*t));
+             t->next_index = next0;
+           }
+
+         /* verify speculative enqueue, maybe switch current next frame */
+         vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
+                                          to_next, n_left_to_next,
+                                          bi0, next0);
+       }
+
+      vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+    }
+
+  vlib_node_increment_counter (vm, ip6_add_from_cache_hbh_node.index,
+                              IP6_ADD_FROM_CACHE_HBH_ERROR_PROCESSED,
+                              processed);
+  return frame->n_vectors;
+}
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE (ip6_add_from_cache_hbh_node) =
+{
+  .function = ip6_add_from_cache_hbh_node_fn,
+  .name = "ip6-add-from-cache-hop-by-hop",
+  .vector_size = sizeof (u32),
+  .format_trace = format_ip6_add_from_cache_hbh_trace,
+  .type = VLIB_NODE_TYPE_INTERNAL,
+  .n_errors = ARRAY_LEN (ip6_add_from_cache_hbh_error_strings),
+  .error_strings =  ip6_add_from_cache_hbh_error_strings,
+  /* See ip/lookup.h */
+  .n_next_nodes = IP6_IOAM_CACHE_INPUT_N_NEXT,
+  .next_nodes =
+  {
+#define _(s,n) [IP6_IOAM_CACHE_INPUT_NEXT_##s] = n,
+    foreach_ip6_ioam_cache_input_next
+#undef _
+  },
+};
+/* *INDENT-ON* */
+
+VLIB_NODE_FUNCTION_MULTIARCH (ip6_add_from_cache_hbh_node,
+                             ip6_add_from_cache_hbh_node_fn)
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
diff --git a/src/plugins/ioam/ip6/ioam_cache_tunnel_select_node.c b/src/plugins/ioam/ip6/ioam_cache_tunnel_select_node.c
new file mode 100644 (file)
index 0000000..3df9871
--- /dev/null
@@ -0,0 +1,770 @@
+/*
+ * Copyright (c) 2017 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * ioam_cache_tunnel_select_node.c
+ * This file implements anycast server selection using ioam data
+ * attached to anycast service selection.
+ * Anycast service is reachable via multiple servers reachable
+ * over SR tunnels.
+ * Works with TCP Anycast application.
+ * Cache entry is created when TCP SYN is received for anycast destination.
+ * Response TCP SYN ACKs for anycast service is compared and selected
+ * response is forwarded.
+ * The functionality is introduced via graph nodes that are hooked into
+ * vnet graph via classifier configs like below:
+ *
+ * Enable anycast service selection:
+ * set ioam ip6 sr-tunnel-select oneway
+ *
+ * Enable following classifier on the anycast service client facing interface
+ * e.g. anycast service is db06::06 then:
+ * classify session acl-hit-next ip6-node ip6-add-syn-hop-by-hop table-index 0 match l3
+ * ip6 dst db06::06 ioam-encap anycast
+ *
+ * Enable following classifier on the interfaces facing the server of anycast service:
+ * classify session acl-hit-next ip6-node ip6-lookup table-index 0 match l3
+ *            ip6 src db06::06 ioam-decap anycast
+ *
+ */
+#include <vlib/vlib.h>
+#include <vnet/vnet.h>
+#include <vnet/pg/pg.h>
+#include <vppinfra/error.h>
+#include <vnet/ip/ip.h>
+#include <vnet/sr/sr.h>
+#include <ioam/ip6/ioam_cache.h>
+#include <vnet/ip/ip6_hop_by_hop.h>
+#include <vnet/ip/ip6_hop_by_hop_packet.h>
+
+typedef struct
+{
+  u32 next_index;
+  u32 flow_label;
+} cache_ts_trace_t;
+
+/* packet trace format function */
+static u8 *
+format_cache_ts_trace (u8 * s, va_list * args)
+{
+  CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
+  CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
+  cache_ts_trace_t *t = va_arg (*args, cache_ts_trace_t *);
+
+  s = format (s, "CACHE: flow_label %d, next index %d",
+             t->flow_label, t->next_index);
+  return s;
+}
+
+vlib_node_registration_t ioam_cache_ts_node;
+
+#define foreach_cache_ts_error \
+_(RECORDED, "ip6 iOAM headers cached")
+
+typedef enum
+{
+#define _(sym,str) CACHE_TS_ERROR_##sym,
+  foreach_cache_ts_error
+#undef _
+    CACHE_TS_N_ERROR,
+} cache_ts_error_t;
+
+static char *cache_ts_error_strings[] = {
+#define _(sym,string) string,
+  foreach_cache_ts_error
+#undef _
+};
+
+typedef enum
+{
+  IOAM_CACHE_TS_NEXT_POP_HBYH,
+  IOAM_CACHE_TS_ERROR_NEXT_DROP,
+  IOAM_CACHE_TS_N_NEXT,
+} cache_ts_next_t;
+
+static uword
+ip6_ioam_cache_ts_node_fn (vlib_main_t * vm,
+                          vlib_node_runtime_t * node, vlib_frame_t * frame)
+{
+  ioam_cache_main_t *cm = &ioam_cache_main;
+  u32 n_left_from, *from, *to_next;
+  cache_ts_next_t next_index;
+  u32 recorded = 0;
+
+  from = vlib_frame_vector_args (frame);
+  n_left_from = frame->n_vectors;
+  next_index = node->cached_next_index;
+
+  while (n_left_from > 0)
+    {
+      u32 n_left_to_next;
+
+      vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
+      // TODO: dual loop
+      while (n_left_from > 0 && n_left_to_next > 0)
+       {
+         u32 bi0;
+         vlib_buffer_t *p0;
+         u32 next0 = IOAM_CACHE_TS_NEXT_POP_HBYH;
+         ip6_header_t *ip0;
+         ip6_hop_by_hop_header_t *hbh0, *hbh_cmp;
+         tcp_header_t *tcp0;
+         u32 tcp_offset0;
+         u32 cache_ts_index = 0;
+         u8 cache_thread_id = 0;
+         int result = 0;
+         int skip = 0;
+
+         bi0 = from[0];
+         from += 1;
+         n_left_from -= 1;
+
+         p0 = vlib_get_buffer (vm, bi0);
+         ip0 = vlib_buffer_get_current (p0);
+         if (IP_PROTOCOL_TCP ==
+             ip6_locate_header (p0, ip0, IP_PROTOCOL_TCP, &tcp_offset0))
+           {
+             tcp0 = (tcp_header_t *) ((u8 *) ip0 + tcp_offset0);
+             if ((tcp0->flags & TCP_FLAG_SYN) == TCP_FLAG_SYN &&
+                 (tcp0->flags & TCP_FLAG_ACK) == TCP_FLAG_ACK)
+               {
+                 /* Look up and compare */
+                 hbh0 = (ip6_hop_by_hop_header_t *) (ip0 + 1);
+
+                 if (0 == ioam_cache_ts_lookup (ip0,
+                                                hbh0->protocol,
+                                                clib_net_to_host_u16
+                                                (tcp0->src_port),
+                                                clib_net_to_host_u16
+                                                (tcp0->dst_port),
+                                                clib_net_to_host_u32
+                                                (tcp0->ack_number), &hbh_cmp,
+                                                &cache_ts_index,
+                                                &cache_thread_id, 1))
+                   {
+                     /* response seen */
+                     result = -1;
+                     if (hbh_cmp)
+                       result =
+                         ip6_ioam_analyse_compare_path_delay (hbh0, hbh_cmp,
+                                                              cm->criteria_oneway);
+                     if (result >= 0)
+                       {
+                         /* current syn/ack is worse than the earlier: Drop */
+                         next0 = IOAM_CACHE_TS_ERROR_NEXT_DROP;
+                         /* Check if all responses are received or time has exceeded
+                            send cached response if yes */
+                         ioam_cache_ts_check_and_send (cache_thread_id,
+                                                       cache_ts_index);
+                       }
+                     else
+                       {
+                         /* Update cache with this buffer */
+                         /* If successfully updated then skip sending it */
+                         if (0 ==
+                             (result =
+                              ioam_cache_ts_update (cache_thread_id,
+                                                    cache_ts_index, bi0,
+                                                    hbh0)))
+                           {
+                             skip = 1;
+                           }
+                         else
+                           next0 = IOAM_CACHE_TS_ERROR_NEXT_DROP;
+                       }
+                   }
+                 else
+                   {
+                     next0 = IOAM_CACHE_TS_ERROR_NEXT_DROP;
+                   }
+               }
+             else if ((tcp0->flags & TCP_FLAG_RST) == TCP_FLAG_RST)
+               {
+                 /* Look up and compare */
+                 hbh0 = (ip6_hop_by_hop_header_t *) (ip0 + 1);
+                 if (0 == ioam_cache_ts_lookup (ip0, hbh0->protocol, clib_net_to_host_u16 (tcp0->src_port), clib_net_to_host_u16 (tcp0->dst_port), clib_net_to_host_u32 (tcp0->ack_number), &hbh_cmp, &cache_ts_index, &cache_thread_id, 1))   //response seen
+                   {
+                     next0 = IOAM_CACHE_TS_ERROR_NEXT_DROP;
+                     if (hbh_cmp)
+                       ioam_cache_ts_check_and_send (cache_thread_id,
+                                                     cache_ts_index);
+                   }
+
+               }
+           }
+         if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE)))
+           {
+             if (p0->flags & VLIB_BUFFER_IS_TRACED)
+               {
+                 cache_ts_trace_t *t =
+                   vlib_add_trace (vm, node, p0, sizeof (*t));
+                 t->flow_label =
+                   clib_net_to_host_u32
+                   (ip0->ip_version_traffic_class_and_flow_label);
+                 t->next_index = next0;
+               }
+           }
+         /* verify speculative enqueue, maybe switch current next frame */
+         if (!skip)
+           {
+             to_next[0] = bi0;
+             to_next += 1;
+             n_left_to_next -= 1;
+             vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
+                                              to_next, n_left_to_next,
+                                              bi0, next0);
+           }
+       }
+
+      vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+    }
+  vlib_node_increment_counter (vm, ioam_cache_ts_node.index,
+                              CACHE_TS_ERROR_RECORDED, recorded);
+  return frame->n_vectors;
+}
+
+/*
+ * Node for IP6 iOAM header cache
+ */
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE (ioam_cache_ts_node) =
+{
+  .function = ip6_ioam_cache_ts_node_fn,
+  .name = "ip6-ioam-tunnel-select",
+  .vector_size = sizeof (u32),
+  .format_trace = format_cache_ts_trace,
+  .type = VLIB_NODE_TYPE_INTERNAL,
+  .n_errors = ARRAY_LEN (cache_ts_error_strings),
+  .error_strings = cache_ts_error_strings,
+  .n_next_nodes = IOAM_CACHE_TS_N_NEXT,
+  /* edit / add dispositions here */
+  .next_nodes =
+  {
+    [IOAM_CACHE_TS_NEXT_POP_HBYH] = "ip6-pop-hop-by-hop",
+    [IOAM_CACHE_TS_ERROR_NEXT_DROP] = "error-drop",
+  },
+};
+/* *INDENT-ON* */
+
+typedef struct
+{
+  u32 next_index;
+} ip6_reset_ts_hbh_trace_t;
+
+/* packet trace format function */
+static u8 *
+format_ip6_reset_ts_hbh_trace (u8 * s, va_list * args)
+{
+  CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
+  CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
+  ip6_reset_ts_hbh_trace_t *t = va_arg (*args,
+                                       ip6_reset_ts_hbh_trace_t *);
+
+  s =
+    format (s, "IP6_IOAM_RESET_TUNNEL_SELECT_HBH: next index %d",
+           t->next_index);
+  return s;
+}
+
+vlib_node_registration_t ip6_reset_ts_hbh_node;
+
+#define foreach_ip6_reset_ts_hbh_error \
+_(PROCESSED, "iOAM Syn/Ack Pkts processed") \
+_(SAVED, "iOAM Syn Pkts state saved") \
+_(REMOVED, "iOAM Syn/Ack Pkts state removed")
+
+typedef enum
+{
+#define _(sym,str) IP6_RESET_TS_HBH_ERROR_##sym,
+  foreach_ip6_reset_ts_hbh_error
+#undef _
+    IP6_RESET_TS_HBH_N_ERROR,
+} ip6_reset_ts_hbh_error_t;
+
+static char *ip6_reset_ts_hbh_error_strings[] = {
+#define _(sym,string) string,
+  foreach_ip6_reset_ts_hbh_error
+#undef _
+};
+
+#define foreach_ip6_ioam_cache_ts_input_next    \
+  _(IP6_LOOKUP, "ip6-lookup")                   \
+  _(DROP, "error-drop")
+
+typedef enum
+{
+#define _(s,n) IP6_IOAM_CACHE_TS_INPUT_NEXT_##s,
+  foreach_ip6_ioam_cache_ts_input_next
+#undef _
+    IP6_IOAM_CACHE_TS_INPUT_N_NEXT,
+} ip6_ioam_cache_ts_input_next_t;
+
+
+static uword
+ip6_reset_ts_hbh_node_fn (vlib_main_t * vm,
+                         vlib_node_runtime_t * node, vlib_frame_t * frame)
+{
+  ioam_cache_main_t *cm = &ioam_cache_main;
+  u32 n_left_from, *from, *to_next;
+  ip_lookup_next_t next_index;
+  u32 processed = 0, cache_ts_added = 0;
+  u64 now;
+  u8 *rewrite = cm->rewrite;
+  u32 rewrite_length = vec_len (rewrite);
+  ioam_e2e_cache_option_t *e2e = 0;
+  u8 no_of_responses = cm->wait_for_responses;
+
+  from = vlib_frame_vector_args (frame);
+  n_left_from = frame->n_vectors;
+  next_index = node->cached_next_index;
+
+  while (n_left_from > 0)
+    {
+      u32 n_left_to_next;
+
+      vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
+
+      now = vlib_time_now (vm);
+      while (n_left_from >= 4 && n_left_to_next >= 2)
+       {
+         u32 bi0, bi1;
+         vlib_buffer_t *b0, *b1;
+         u32 next0, next1;
+         ip6_header_t *ip0, *ip1;
+         tcp_header_t *tcp0, *tcp1;
+         u32 tcp_offset0, tcp_offset1;
+         ip6_hop_by_hop_header_t *hbh0, *hbh1;
+         u64 *copy_src0, *copy_dst0, *copy_src1, *copy_dst1;
+         u16 new_l0, new_l1;
+         u32 pool_index0 = 0, pool_index1 = 0;
+
+         next0 = next1 = IP6_IOAM_CACHE_TS_INPUT_NEXT_IP6_LOOKUP;
+         /* Prefetch next iteration. */
+         {
+           vlib_buffer_t *p2, *p3;
+
+           p2 = vlib_get_buffer (vm, from[2]);
+           p3 = vlib_get_buffer (vm, from[3]);
+
+           vlib_prefetch_buffer_header (p2, LOAD);
+           vlib_prefetch_buffer_header (p3, LOAD);
+           CLIB_PREFETCH (p2->data, 2 * CLIB_CACHE_LINE_BYTES, LOAD);
+           CLIB_PREFETCH (p3->data, 2 * CLIB_CACHE_LINE_BYTES, LOAD);
+         }
+
+
+         /* speculatively enqueue b0 to the current next frame */
+         to_next[0] = bi0 = from[0];
+         to_next[1] = bi1 = from[1];
+         from += 2;
+         to_next += 2;
+         n_left_from -= 2;
+         n_left_to_next -= 2;
+
+         b0 = vlib_get_buffer (vm, bi0);
+         b1 = vlib_get_buffer (vm, bi1);
+
+         ip0 = vlib_buffer_get_current (b0);
+         ip1 = vlib_buffer_get_current (b1);
+
+         if (IP_PROTOCOL_TCP !=
+             ip6_locate_header (b0, ip0, IP_PROTOCOL_TCP, &tcp_offset0))
+           {
+             goto NEXT00;
+           }
+         tcp0 = (tcp_header_t *) ((u8 *) ip0 + tcp_offset0);
+         if ((tcp0->flags & TCP_FLAG_SYN) == TCP_FLAG_SYN &&
+             (tcp0->flags & TCP_FLAG_ACK) == 0)
+           {
+             if (no_of_responses > 0)
+               {
+                 /* Create TS select entry */
+                 if (0 == ioam_cache_ts_add (ip0,
+                                             clib_net_to_host_u16
+                                             (tcp0->src_port),
+                                             clib_net_to_host_u16
+                                             (tcp0->dst_port),
+                                             clib_net_to_host_u32
+                                             (tcp0->seq_number) + 1,
+                                             no_of_responses, now,
+                                             vm->cpu_index, &pool_index0))
+                   {
+                     cache_ts_added++;
+                   }
+               }
+             copy_dst0 = (u64 *) (((u8 *) ip0) - rewrite_length);
+             copy_src0 = (u64 *) ip0;
+
+             copy_dst0[0] = copy_src0[0];
+             copy_dst0[1] = copy_src0[1];
+             copy_dst0[2] = copy_src0[2];
+             copy_dst0[3] = copy_src0[3];
+             copy_dst0[4] = copy_src0[4];
+
+             vlib_buffer_advance (b0, -(word) rewrite_length);
+             ip0 = vlib_buffer_get_current (b0);
+
+             hbh0 = (ip6_hop_by_hop_header_t *) (ip0 + 1);
+             /* $$$ tune, rewrite_length is a multiple of 8 */
+             clib_memcpy (hbh0, rewrite, rewrite_length);
+             e2e =
+               (ioam_e2e_cache_option_t *) ((u8 *) hbh0 +
+                                            cm->rewrite_pool_index_offset);
+             e2e->pool_id = (u8) vm->cpu_index;
+             e2e->pool_index = pool_index0;
+             ioam_e2e_id_rewrite_handler ((ioam_e2e_id_option_t *)
+                                          ((u8 *) e2e +
+                                           sizeof (ioam_e2e_cache_option_t)),
+                                          b0);
+             /* Patch the protocol chain, insert the h-b-h (type 0) header */
+             hbh0->protocol = ip0->protocol;
+             ip0->protocol = 0;
+             new_l0 =
+               clib_net_to_host_u16 (ip0->payload_length) + rewrite_length;
+             ip0->payload_length = clib_host_to_net_u16 (new_l0);
+             processed++;
+           }
+
+       NEXT00:
+         if (IP_PROTOCOL_TCP !=
+             ip6_locate_header (b1, ip1, IP_PROTOCOL_TCP, &tcp_offset1))
+           {
+             goto TRACE00;
+           }
+         tcp1 = (tcp_header_t *) ((u8 *) ip1 + tcp_offset1);
+         if ((tcp1->flags & TCP_FLAG_SYN) == TCP_FLAG_SYN &&
+             (tcp1->flags & TCP_FLAG_ACK) == 0)
+           {
+             if (no_of_responses > 0)
+               {
+                 /* Create TS select entry */
+                 if (0 == ioam_cache_ts_add (ip1,
+                                             clib_net_to_host_u16
+                                             (tcp1->src_port),
+                                             clib_net_to_host_u16
+                                             (tcp1->dst_port),
+                                             clib_net_to_host_u32
+                                             (tcp1->seq_number) + 1,
+                                             no_of_responses, now,
+                                             vm->cpu_index, &pool_index1))
+                   {
+                     cache_ts_added++;
+                   }
+               }
+
+             copy_dst1 = (u64 *) (((u8 *) ip1) - rewrite_length);
+             copy_src1 = (u64 *) ip1;
+
+             copy_dst1[0] = copy_src1[0];
+             copy_dst1[1] = copy_src1[1];
+             copy_dst1[2] = copy_src1[2];
+             copy_dst1[3] = copy_src1[3];
+             copy_dst1[4] = copy_src1[4];
+
+             vlib_buffer_advance (b1, -(word) rewrite_length);
+             ip1 = vlib_buffer_get_current (b1);
+
+             hbh1 = (ip6_hop_by_hop_header_t *) (ip1 + 1);
+             /* $$$ tune, rewrite_length is a multiple of 8 */
+             clib_memcpy (hbh1, rewrite, rewrite_length);
+             e2e =
+               (ioam_e2e_cache_option_t *) ((u8 *) hbh1 +
+                                            cm->rewrite_pool_index_offset);
+             e2e->pool_id = (u8) vm->cpu_index;
+             e2e->pool_index = pool_index1;
+             ioam_e2e_id_rewrite_handler ((ioam_e2e_id_option_t *)
+                                          ((u8 *) e2e +
+                                           sizeof (ioam_e2e_cache_option_t)),
+                                          b1);
+             /* Patch the protocol chain, insert the h-b-h (type 0) header */
+             hbh1->protocol = ip1->protocol;
+             ip1->protocol = 0;
+             new_l1 =
+               clib_net_to_host_u16 (ip1->payload_length) + rewrite_length;
+             ip1->payload_length = clib_host_to_net_u16 (new_l1);
+             processed++;
+           }
+
+       TRACE00:
+         if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE)))
+           {
+             if (b0->flags & VLIB_BUFFER_IS_TRACED)
+               {
+                 ip6_reset_ts_hbh_trace_t *t =
+                   vlib_add_trace (vm, node, b0, sizeof (*t));
+                 t->next_index = next0;
+               }
+             if (b1->flags & VLIB_BUFFER_IS_TRACED)
+               {
+                 ip6_reset_ts_hbh_trace_t *t =
+                   vlib_add_trace (vm, node, b1, sizeof (*t));
+                 t->next_index = next1;
+               }
+
+           }
+
+         /* verify speculative enqueue, maybe switch current next frame */
+         vlib_validate_buffer_enqueue_x2 (vm, node, next_index,
+                                          to_next, n_left_to_next,
+                                          bi0, bi1, next0, next1);
+       }
+      while (n_left_from > 0 && n_left_to_next > 0)
+       {
+         u32 bi0;
+         vlib_buffer_t *b0;
+         u32 next0;
+         ip6_header_t *ip0;
+         tcp_header_t *tcp0;
+         u32 tcp_offset0;
+         ip6_hop_by_hop_header_t *hbh0;
+         u64 *copy_src0, *copy_dst0;
+         u16 new_l0;
+         u32 pool_index0 = 0;
+
+         next0 = IP6_IOAM_CACHE_TS_INPUT_NEXT_IP6_LOOKUP;
+         /* speculatively enqueue b0 to the current next frame */
+         bi0 = from[0];
+         to_next[0] = bi0;
+         from += 1;
+         to_next += 1;
+         n_left_from -= 1;
+         n_left_to_next -= 1;
+
+         b0 = vlib_get_buffer (vm, bi0);
+
+         ip0 = vlib_buffer_get_current (b0);
+         if (IP_PROTOCOL_TCP !=
+             ip6_locate_header (b0, ip0, IP_PROTOCOL_TCP, &tcp_offset0))
+           {
+             goto TRACE0;
+           }
+         tcp0 = (tcp_header_t *) ((u8 *) ip0 + tcp_offset0);
+         if ((tcp0->flags & TCP_FLAG_SYN) == TCP_FLAG_SYN &&
+             (tcp0->flags & TCP_FLAG_ACK) == 0)
+           {
+             if (no_of_responses > 0)
+               {
+                 /* Create TS select entry */
+                 if (0 == ioam_cache_ts_add (ip0,
+                                             clib_net_to_host_u16
+                                             (tcp0->src_port),
+                                             clib_net_to_host_u16
+                                             (tcp0->dst_port),
+                                             clib_net_to_host_u32
+                                             (tcp0->seq_number) + 1,
+                                             no_of_responses, now,
+                                             vm->cpu_index, &pool_index0))
+                   {
+                     cache_ts_added++;
+                   }
+               }
+             copy_dst0 = (u64 *) (((u8 *) ip0) - rewrite_length);
+             copy_src0 = (u64 *) ip0;
+
+             copy_dst0[0] = copy_src0[0];
+             copy_dst0[1] = copy_src0[1];
+             copy_dst0[2] = copy_src0[2];
+             copy_dst0[3] = copy_src0[3];
+             copy_dst0[4] = copy_src0[4];
+
+             vlib_buffer_advance (b0, -(word) rewrite_length);
+             ip0 = vlib_buffer_get_current (b0);
+
+             hbh0 = (ip6_hop_by_hop_header_t *) (ip0 + 1);
+             /* $$$ tune, rewrite_length is a multiple of 8 */
+             clib_memcpy (hbh0, rewrite, rewrite_length);
+             e2e =
+               (ioam_e2e_cache_option_t *) ((u8 *) hbh0 +
+                                            cm->rewrite_pool_index_offset);
+             e2e->pool_id = (u8) vm->cpu_index;
+             e2e->pool_index = pool_index0;
+             ioam_e2e_id_rewrite_handler ((ioam_e2e_id_option_t *)
+                                          ((u8 *) e2e +
+                                           sizeof (ioam_e2e_cache_option_t)),
+                                          b0);
+             /* Patch the protocol chain, insert the h-b-h (type 0) header */
+             hbh0->protocol = ip0->protocol;
+             ip0->protocol = 0;
+             new_l0 =
+               clib_net_to_host_u16 (ip0->payload_length) + rewrite_length;
+             ip0->payload_length = clib_host_to_net_u16 (new_l0);
+             processed++;
+           }
+       TRACE0:
+         if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE)
+                            && (b0->flags & VLIB_BUFFER_IS_TRACED)))
+           {
+             ip6_reset_ts_hbh_trace_t *t =
+               vlib_add_trace (vm, node, b0, sizeof (*t));
+             t->next_index = next0;
+           }
+
+         /* verify speculative enqueue, maybe switch current next frame */
+         vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
+                                          to_next, n_left_to_next,
+                                          bi0, next0);
+       }
+
+      vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+    }
+
+  vlib_node_increment_counter (vm, ip6_reset_ts_hbh_node.index,
+                              IP6_RESET_TS_HBH_ERROR_PROCESSED, processed);
+  vlib_node_increment_counter (vm, ip6_reset_ts_hbh_node.index,
+                              IP6_RESET_TS_HBH_ERROR_SAVED, cache_ts_added);
+
+  return frame->n_vectors;
+}
+
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE (ip6_reset_ts_hbh_node) =
+{
+  .function = ip6_reset_ts_hbh_node_fn,
+  .name = "ip6-add-syn-hop-by-hop",
+  .vector_size = sizeof (u32),
+  .format_trace = format_ip6_reset_ts_hbh_trace,
+  .type = VLIB_NODE_TYPE_INTERNAL,
+  .n_errors = ARRAY_LEN (ip6_reset_ts_hbh_error_strings),
+  .error_strings =  ip6_reset_ts_hbh_error_strings,
+  /* See ip/lookup.h */
+  .n_next_nodes = IP6_IOAM_CACHE_TS_INPUT_N_NEXT,
+  .next_nodes =
+  {
+#define _(s,n) [IP6_IOAM_CACHE_TS_INPUT_NEXT_##s] = n,
+    foreach_ip6_ioam_cache_ts_input_next
+#undef _
+  },
+};
+
+VLIB_NODE_FUNCTION_MULTIARCH (ip6_reset_ts_hbh_node, ip6_reset_ts_hbh_node_fn)
+/* *INDENT-ON* */
+
+vlib_node_registration_t ioam_cache_ts_timer_tick_node;
+
+typedef struct
+{
+  u32 thread_index;
+} ioam_cache_ts_timer_tick_trace_t;
+
+/* packet trace format function */
+static u8 *
+format_ioam_cache_ts_timer_tick_trace (u8 * s, va_list * args)
+{
+  CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
+  CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
+  ioam_cache_ts_timer_tick_trace_t *t =
+    va_arg (*args, ioam_cache_ts_timer_tick_trace_t *);
+
+  s = format (s, "IOAM_CACHE_TS_TIMER_TICK: thread index %d",
+             t->thread_index);
+  return s;
+}
+
+#define foreach_ioam_cache_ts_timer_tick_error                 \
+  _(TIMER, "Timer events")
+
+typedef enum
+{
+#define _(sym,str) IOAM_CACHE_TS_TIMER_TICK_ERROR_##sym,
+  foreach_ioam_cache_ts_timer_tick_error
+#undef _
+    IOAM_CACHE_TS_TIMER_TICK_N_ERROR,
+} ioam_cache_ts_timer_tick_error_t;
+
+static char *ioam_cache_ts_timer_tick_error_strings[] = {
+#define _(sym,string) string,
+  foreach_ioam_cache_ts_timer_tick_error
+#undef _
+};
+
+void
+ioam_cache_ts_timer_node_enable (vlib_main_t * vm, u8 enable)
+{
+  vlib_node_set_state (vm, ioam_cache_ts_timer_tick_node.index,
+                      enable ==
+                      0 ? VLIB_NODE_STATE_DISABLED :
+                      VLIB_NODE_STATE_POLLING);
+}
+
+void
+expired_cache_ts_timer_callback (u32 * expired_timers)
+{
+  ioam_cache_main_t *cm = &ioam_cache_main;
+  int i;
+  u32 pool_index;
+  u32 thread_index = os_get_cpu_number ();
+  u32 count = 0;
+
+  for (i = 0; i < vec_len (expired_timers); i++)
+    {
+      /* Get pool index and pool id */
+      pool_index = expired_timers[i] & 0x0FFFFFFF;
+
+      /* Handle expiration */
+      ioam_cache_ts_send (thread_index, pool_index);
+      count++;
+    }
+  vlib_node_increment_counter (cm->vlib_main,
+                              ioam_cache_ts_timer_tick_node.index,
+                              IOAM_CACHE_TS_TIMER_TICK_ERROR_TIMER, count);
+}
+
+static uword
+ioam_cache_ts_timer_tick_node_fn (vlib_main_t * vm,
+                                 vlib_node_runtime_t * node,
+                                 vlib_frame_t * f)
+{
+  ioam_cache_main_t *cm = &ioam_cache_main;
+  u32 my_thread_index = os_get_cpu_number ();
+  struct timespec ts, tsrem;
+
+  tw_timer_expire_timers_16t_2w_512sl (&cm->timer_wheels[my_thread_index],
+                                      vlib_time_now (vm));
+  ts.tv_sec = 0;
+  ts.tv_nsec = 1000 * 1000 * IOAM_CACHE_TS_TICK;
+  while (nanosleep (&ts, &tsrem) < 0)
+    {
+      ts = tsrem;
+    }
+
+  return 0;
+}
+/* *INDENT-OFF* */
+VLIB_REGISTER_NODE (ioam_cache_ts_timer_tick_node) = {
+  .function = ioam_cache_ts_timer_tick_node_fn,
+  .name = "ioam-cache-ts-timer-tick",
+  .format_trace = format_ioam_cache_ts_timer_tick_trace,
+  .type = VLIB_NODE_TYPE_INPUT,
+
+  .n_errors = ARRAY_LEN(ioam_cache_ts_timer_tick_error_strings),
+  .error_strings = ioam_cache_ts_timer_tick_error_strings,
+
+  .n_next_nodes = 1,
+
+  .state = VLIB_NODE_STATE_DISABLED,
+
+  /* edit / add dispositions here */
+  .next_nodes = {
+    [0] = "error-drop",
+  },
+};
+/* *INDENT-ON* */
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */
index 60802ad..7065b41 100644 (file)
@@ -213,8 +213,6 @@ typedef struct
   u32 app_data;
 } ioam_trace_ts_app_t;
 
-
-
 static inline u8
 fetch_trace_data_size (u8 trace_type)
 {
index f258b75..9dfb812 100644 (file)
@@ -187,10 +187,15 @@ typedef enum fib_entry_attribute_t_ {
      * The prefix/address is local to this device
      */
     FIB_ENTRY_ATTRIBUTE_LOCAL,
+    /**
+     * The prefix/address exempted from loose uRPF check
+     * To be used with caution
+     */
+    FIB_ENTRY_ATTRIBUTE_URPF_EXEMPT,
     /**
      * Marker. add new entries before this one.
      */
-    FIB_ENTRY_ATTRIBUTE_LAST = FIB_ENTRY_ATTRIBUTE_LOCAL,
+    FIB_ENTRY_ATTRIBUTE_LAST = FIB_ENTRY_ATTRIBUTE_URPF_EXEMPT,
 } fib_entry_attribute_t;
 
 /**
@@ -205,6 +210,7 @@ typedef enum fib_entry_attribute_t_ {
     [FIB_ENTRY_ATTRIBUTE_DROP]      = "drop",          \
     [FIB_ENTRY_ATTRIBUTE_EXCLUSIVE] = "exclusive",      \
     [FIB_ENTRY_ATTRIBUTE_LOCAL]     = "local",         \
+    [FIB_ENTRY_ATTRIBUTE_URPF_EXEMPT] = "uRPF-exempt"   \
 }
 
 #define FOR_EACH_FIB_ATTRIBUTE(_item)                  \
@@ -220,6 +226,7 @@ typedef enum fib_entry_flag_t_ {
     FIB_ENTRY_FLAG_EXCLUSIVE = (1 << FIB_ENTRY_ATTRIBUTE_EXCLUSIVE),
     FIB_ENTRY_FLAG_LOCAL     = (1 << FIB_ENTRY_ATTRIBUTE_LOCAL),
     FIB_ENTRY_FLAG_IMPORT    = (1 << FIB_ENTRY_ATTRIBUTE_IMPORT),
+    FIB_ENTRY_FLAG_LOOSE_URPF_EXEMPT = (1 << FIB_ENTRY_ATTRIBUTE_URPF_EXEMPT),
 } __attribute__((packed)) fib_entry_flag_t;
 
 /**
index 6f5b7fe..feb232d 100644 (file)
@@ -446,8 +446,9 @@ fib_entry_src_mk_lb (fib_entry_t *fib_entry,
      */
     index_t ui = fib_path_list_get_urpf(esrc->fes_pl);
 
-    if (fib_entry_is_sourced(fib_entry_get_index(fib_entry),
-                            FIB_SOURCE_URPF_EXEMPT) &&
+    if ((fib_entry_is_sourced(fib_entry_get_index(fib_entry),
+                             FIB_SOURCE_URPF_EXEMPT) ||
+        (esrc->fes_entry_flags & FIB_ENTRY_FLAG_LOOSE_URPF_EXEMPT))&&
        (0 == fib_urpf_check_size(ui)))
     {
        /*
index 2615fbf..cf40fbb 100644 (file)
@@ -474,6 +474,84 @@ ip6_compute_flow_hash (const ip6_header_t * ip,
   return (u32) c;
 }
 
+/* ip6_locate_header
+ *
+ * This function is to search for the header specified by the protocol number
+ * in find_hdr_type.
+ * This is used to locate a specific IPv6 extension header
+ * or to find transport layer header.
+ *   1. If the find_hdr_type < 0 then it finds and returns the protocol number and
+ *   offset stored in *offset of the transport or ESP header in the chain if
+ *   found.
+ *   2. If a header with find_hdr_type > 0 protocol number is found then the
+ *      offset is stored in *offset and protocol number of the header is
+ *      returned.
+ *   3. If find_hdr_type is not found or packet is malformed or
+ *      it is a non-first fragment -1 is returned.
+ */
+always_inline int
+ip6_locate_header (vlib_buffer_t * p0,
+                  ip6_header_t * ip0, int find_hdr_type, u32 * offset)
+{
+  u8 next_proto = ip0->protocol;
+  u8 *next_header;
+  u8 done = 0;
+  u32 cur_offset;
+  u8 *temp_nxthdr = 0;
+  u32 exthdr_len = 0;
+
+  next_header = ip6_next_header (ip0);
+  cur_offset = sizeof (ip6_header_t);
+  while (1)
+    {
+      done = (next_proto == find_hdr_type);
+      if (PREDICT_FALSE
+         (next_header >=
+          (u8 *) vlib_buffer_get_current (p0) + p0->current_length))
+       {
+         //A malicious packet could set an extension header with a too big size
+         return (-1);
+       }
+      if (done)
+       break;
+      if ((!ip6_ext_hdr (next_proto)) || next_proto == IP_PROTOCOL_IP6_NONXT)
+       {
+         if (find_hdr_type < 0)
+           break;
+         return -1;
+       }
+      if (next_proto == IP_PROTOCOL_IPV6_FRAGMENTATION)
+       {
+         ip6_frag_hdr_t *frag_hdr = (ip6_frag_hdr_t *) next_header;
+         u16 frag_off = ip6_frag_hdr_offset (frag_hdr);
+         /* Non first fragment return -1 */
+         if (frag_off)
+           return (-1);
+         exthdr_len = sizeof (ip6_frag_hdr_t);
+         temp_nxthdr = next_header + exthdr_len;
+       }
+      else if (next_proto == IP_PROTOCOL_IPSEC_AH)
+       {
+         exthdr_len =
+           ip6_ext_authhdr_len (((ip6_ext_header_t *) next_header));
+         temp_nxthdr = next_header + exthdr_len;
+       }
+      else
+       {
+         exthdr_len =
+           ip6_ext_header_len (((ip6_ext_header_t *) next_header));
+         temp_nxthdr = next_header + exthdr_len;
+       }
+      next_proto = ((ip6_ext_header_t *) next_header)->next_hdr;
+      next_header = temp_nxthdr;
+      cur_offset += exthdr_len;
+    }
+
+  *offset = cur_offset;
+  return (next_proto);
+}
+
+u8 *format_ip6_hop_by_hop_ext_hdr (u8 * s, va_list * args);
 /*
  * Hop-by-Hop handling
  */
index 6f77c6d..2388a30 100644 (file)
@@ -1236,80 +1236,6 @@ ip6_tcp_udp_icmp_validate_checksum (vlib_main_t * vm, vlib_buffer_t * p0)
   return p0->flags;
 }
 
-/* ip6_locate_header
- *
- * This function is to search for the header specified by the find_hdr number.
- *   1. If the find_hdr < 0 then it finds and returns the protocol number and
- *   offset stored in *offset of the transport or ESP header in the chain if
- *   found.
- *   2. If a header with find_hdr > 0 protocol number is found then the
- *      offset is stored in *offset and protocol number of the header is
- *      returned.
- *   3. If find_hdr header is not found or packet is malformed or
- *      it is a non-first fragment -1 is returned.
- */
-always_inline int
-ip6_locate_header (vlib_buffer_t * p0,
-                  ip6_header_t * ip0, int find_hdr, u32 * offset)
-{
-  u8 next_proto = ip0->protocol;
-  u8 *next_header;
-  u8 done = 0;
-  u32 cur_offset;
-  u8 *temp_nxthdr = 0;
-  u32 exthdr_len = 0;
-
-  next_header = ip6_next_header (ip0);
-  cur_offset = sizeof (ip6_header_t);
-  while (1)
-    {
-      done = (next_proto == find_hdr);
-      if (PREDICT_FALSE
-         (next_header >=
-          (u8 *) vlib_buffer_get_current (p0) + p0->current_length))
-       {
-         //A malicious packet could set an extension header with a too big size
-         return (-1);
-       }
-      if (done)
-       break;
-      if ((!ip6_ext_hdr (next_proto)) || next_proto == IP_PROTOCOL_IP6_NONXT)
-       {
-         if (find_hdr < 0)
-           break;
-         return -1;
-       }
-      if (next_proto == IP_PROTOCOL_IPV6_FRAGMENTATION)
-       {
-         ip6_frag_hdr_t *frag_hdr = (ip6_frag_hdr_t *) next_header;
-         u16 frag_off = ip6_frag_hdr_offset (frag_hdr);
-         /* Non first fragment return -1 */
-         if (frag_off)
-           return (-1);
-         exthdr_len = sizeof (ip6_frag_hdr_t);
-         temp_nxthdr = next_header + exthdr_len;
-       }
-      else if (next_proto == IP_PROTOCOL_IPSEC_AH)
-       {
-         exthdr_len =
-           ip6_ext_authhdr_len (((ip6_ext_header_t *) next_header));
-         temp_nxthdr = next_header + exthdr_len;
-       }
-      else
-       {
-         exthdr_len =
-           ip6_ext_header_len (((ip6_ext_header_t *) next_header));
-         temp_nxthdr = next_header + exthdr_len;
-       }
-      next_proto = ((ip6_ext_header_t *) next_header)->next_hdr;
-      next_header = temp_nxthdr;
-      cur_offset += exthdr_len;
-    }
-
-  *offset = cur_offset;
-  return (next_proto);
-}
-
 /**
  * @brief returns number of links on which src is reachable.
  */
@@ -2413,6 +2339,50 @@ static char *ip6_hop_by_hop_error_strings[] = {
 #undef _
 };
 
+u8 *
+format_ip6_hop_by_hop_ext_hdr (u8 * s, va_list * args)
+{
+  ip6_hop_by_hop_header_t *hbh0 = va_arg (*args, ip6_hop_by_hop_header_t *);
+  int total_len = va_arg (*args, int);
+  ip6_hop_by_hop_option_t *opt0, *limit0;
+  ip6_hop_by_hop_main_t *hm = &ip6_hop_by_hop_main;
+  u8 type0;
+
+  s = format (s, "IP6_HOP_BY_HOP: next protocol %d len %d total %d",
+             hbh0->protocol, (hbh0->length + 1) << 3, total_len);
+
+  opt0 = (ip6_hop_by_hop_option_t *) (hbh0 + 1);
+  limit0 = (ip6_hop_by_hop_option_t *) ((u8 *) hbh0 + total_len);
+
+  while (opt0 < limit0)
+    {
+      type0 = opt0->type;
+      switch (type0)
+       {
+       case 0:         /* Pad, just stop */
+         opt0 = (ip6_hop_by_hop_option_t *) ((u8 *) opt0 + 1);
+         break;
+
+       default:
+         if (hm->trace[type0])
+           {
+             s = (*hm->trace[type0]) (s, opt0);
+           }
+         else
+           {
+             s =
+               format (s, "\n    unrecognized option %d length %d", type0,
+                       opt0->length);
+           }
+         opt0 =
+           (ip6_hop_by_hop_option_t *) (((u8 *) opt0) + opt0->length +
+                                        sizeof (ip6_hop_by_hop_option_t));
+         break;
+       }
+    }
+  return s;
+}
+
 static u8 *
 format_ip6_hop_by_hop_trace (u8 * s, va_list * args)
 {
index eb781e4..1545a84 100755 (executable)
@@ -83,7 +83,6 @@ typedef struct
   ip6_address_t bsid;                  /**< BindingSID (key) */
 
   u8 type;                                     /**< Type (default is 0) */
-
   /* SR Policy specific DPO                                                                               */
   /* IF Type = DEFAULT Then Load Balancer DPO among SID lists     */
   /* IF Type = SPRAY then Spray DPO with all SID lists                    */
@@ -290,6 +289,45 @@ sr_steering_policy (int is_del, ip6_address_t * bsid, u32 sr_policy_index,
                    u32 table_id, ip46_address_t * prefix, u32 mask_width,
                    u32 sw_if_index, u8 traffic_type);
 
+/**
+ * @brief SR rewrite string computation for SRH insertion (inline)
+ *
+ * @param sl is a vector of IPv6 addresses composing the Segment List
+ *
+ * @return precomputed rewrite string for SRH insertion
+ */
+static inline u8 *
+ip6_compute_rewrite_string_insert (ip6_address_t * sl)
+{
+  ip6_sr_header_t *srh;
+  ip6_address_t *addrp, *this_address;
+  u32 header_length = 0;
+  u8 *rs = NULL;
+
+  header_length = 0;
+  header_length += sizeof (ip6_sr_header_t);
+  header_length += (vec_len (sl) + 1) * sizeof (ip6_address_t);
+
+  vec_validate (rs, header_length - 1);
+
+  srh = (ip6_sr_header_t *) rs;
+  srh->type = ROUTING_HEADER_TYPE_SR;
+  srh->segments_left = vec_len (sl);
+  srh->first_segment = vec_len (sl);
+  srh->length = ((sizeof (ip6_sr_header_t) +
+                 ((vec_len (sl) + 1) * sizeof (ip6_address_t))) / 8) - 1;
+  srh->flags = 0x00;
+  srh->reserved = 0x0000;
+  addrp = srh->segments + vec_len (sl);
+  vec_foreach (this_address, sl)
+  {
+    clib_memcpy (addrp->as_u8, this_address->as_u8, sizeof (ip6_address_t));
+    addrp--;
+  }
+  return rs;
+}
+
+
 #endif /* included_vnet_sr_h */
 
 /*