Raw export of IP6 packet with iOAM metadata 67/2267/11
authorShwetha <shwethab@cisco.com>
Mon, 8 Aug 2016 14:51:04 +0000 (15:51 +0100)
committerDave Barach <openvpp@barachs.net>
Mon, 22 Aug 2016 17:19:04 +0000 (17:19 +0000)
- is enabled on iOAM pop nodes with "set ioam export ipfix collector"
- ioam_export_node Hooks into vlib graph b/n ip6-hop-by-hop node and ip6-pop-hop-by-hop node
- A buffer per worker thread is created for collecting packet data to be exported
- ioam_export_node exports first 3 cachelines by collecting it in a MTU sized frame, slaps on ipfix header for export
- ioam_export_thread process node - checks for unsent record buffers for longer than 20 seconds and exports it
- Added dual loop and prefetch in add, hop-by-hop and pop functions

To be done:
- IPfix template
- Multi collector distribution of ipfix packets
- Port to be configurable

Change-Id: I959b4253036551382562bdaf10a83fd6f2f1c88b
Signed-off-by: Shwetha <shwethab@cisco.com>
12 files changed:
plugins/ioam-plugin/Makefile.am
plugins/ioam-plugin/ioam/export/ioam_export.api [new file with mode: 0644]
plugins/ioam-plugin/ioam/export/ioam_export.c [new file with mode: 0644]
plugins/ioam-plugin/ioam/export/ioam_export.h [new file with mode: 0644]
plugins/ioam-plugin/ioam/export/ioam_export_all_api_h.h [new file with mode: 0644]
plugins/ioam-plugin/ioam/export/ioam_export_msg_enum.h [new file with mode: 0644]
plugins/ioam-plugin/ioam/export/ioam_export_test.c [new file with mode: 0644]
plugins/ioam-plugin/ioam/export/ioam_export_thread.c [new file with mode: 0644]
plugins/ioam-plugin/ioam/export/node.c [new file with mode: 0644]
vnet/vnet/ip/ip6.h
vnet/vnet/ip/ip6_forward.c
vnet/vnet/ip/ip6_hop_by_hop.c

index 47ff116..2ea29e0 100644 (file)
@@ -54,6 +54,30 @@ vpppluginsdir = ${libdir}/vpp_plugins
 vppapitestplugins_LTLIBRARIES = ioam_pot_test_plugin.la
 vppplugins_LTLIBRARIES = ioam_pot_plugin.la
 
+########################################
+# iOAM trace export
+########################################
+
+ioam_export_plugin_la_SOURCES =   \
+ioam/export/ioam_export.c         \
+ioam/export/node.c                \
+ioam/export/ioam_export.api.h     \
+ioam/export/ioam_export_thread.c
+
+BUILT_SOURCES += ioam/export/ioam_export.api.h
+
+noinst_HEADERS +=                       \
+  ioam/export/ioam_export_all_api_h.h   \
+  ioam/export/ioam_export_msg_enum.h    \
+  ioam/export/ioam_export.api.h
+
+ioam_export_test_plugin_la_SOURCES =  \
+  ioam/export/ioam_export_test.c      \
+  ioam/export/ioam_export_plugin.api.h
+
+vppapitestplugins_LTLIBRARIES += ioam_export_test_plugin.la
+vppplugins_LTLIBRARIES += ioam_export_plugin.la
+
 
 # Remove *.la files
 install-data-hook:
diff --git a/plugins/ioam-plugin/ioam/export/ioam_export.api b/plugins/ioam-plugin/ioam/export/ioam_export.api
new file mode 100644 (file)
index 0000000..f22d9fc
--- /dev/null
@@ -0,0 +1,42 @@
+/* Hey Emacs use -*- mode: C -*- */
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/* Define a simple binary API to control the feature */
+
+define ioam_export_ip6_enable_disable {
+    /* Client identifier, set from api_main.my_client_index */
+    u32 client_index;
+
+    /* Arbitrary context, so client can match reply to request */
+    u32 context;
+
+    /* Enable / disable the feature */
+    u8 is_disable;
+
+    /* Collector ip address */
+    u8 collector_address[4];
+    u8 src_address[4];
+
+    /* Src ip address */
+};
+
+define ioam_export_ip6_enable_disable_reply {
+    /* From the request */
+    u32 context;
+
+    /* Return value, zero means all OK */
+    i32 retval;
+};
diff --git a/plugins/ioam-plugin/ioam/export/ioam_export.c b/plugins/ioam-plugin/ioam/export/ioam_export.c
new file mode 100644 (file)
index 0000000..9efcab0
--- /dev/null
@@ -0,0 +1,258 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ *------------------------------------------------------------------
+ * ioam_export.c - ioam export API / debug CLI handling
+ *------------------------------------------------------------------
+ */
+
+#include <vnet/vnet.h>
+#include <vnet/plugin/plugin.h>
+#include <ioam/export/ioam_export.h>
+
+#include <vlibapi/api.h>
+#include <vlibmemory/api.h>
+#include <vlibsocket/api.h>
+#include <vnet/ip/ip6_hop_by_hop.h>
+
+#include "ioam_export.h"
+
+/* define message IDs */
+#include <ioam/export/ioam_export_msg_enum.h>
+
+/* define message structures */
+#define vl_typedefs
+#include <ioam/export/ioam_export_all_api_h.h>
+#undef vl_typedefs
+
+/* define generated endian-swappers */
+#define vl_endianfun
+#include <ioam/export/ioam_export_all_api_h.h>
+#undef vl_endianfun
+
+/* instantiate all the print functions we know about */
+#define vl_print(handle, ...) vlib_cli_output (handle, __VA_ARGS__)
+#define vl_printfun
+#include <ioam/export/ioam_export_all_api_h.h>
+#undef vl_printfun
+
+/* Get the API version number */
+#define vl_api_version(n,v) static u32 api_version=(v);
+#include <ioam/export/ioam_export_all_api_h.h>
+#undef vl_api_version
+
+/*
+ * A handy macro to set up a message reply.
+ * Assumes that the following variables are available:
+ * mp - pointer to request message
+ * rmp - pointer to reply message type
+ * rv - return value
+ */
+
+#define REPLY_MACRO(t)                                          \
+do {                                                            \
+    unix_shared_memory_queue_t * q =                            \
+    vl_api_client_index_to_input_queue (mp->client_index);      \
+    if (!q)                                                     \
+        return;                                                 \
+                                                                \
+    rmp = vl_msg_api_alloc (sizeof (*rmp));                     \
+    rmp->_vl_msg_id = ntohs((t)+sm->msg_id_base);               \
+    rmp->context = mp->context;                                 \
+    rmp->retval = ntohl(rv);                                    \
+                                                                \
+    vl_msg_api_send_shmem (q, (u8 *)&rmp);                      \
+} while(0);
+
+
+/* List of message types that this plugin understands */
+
+#define foreach_ioam_export_plugin_api_msg                        \
+_(IOAM_EXPORT_IP6_ENABLE_DISABLE, ioam_export_ip6_enable_disable)
+
+/*
+ * This routine exists to convince the vlib plugin framework that
+ * we haven't accidentally copied a random .dll into the plugin directory.
+ *
+ * Also collects global variable pointers passed from the vpp engine
+ */
+
+clib_error_t *
+vlib_plugin_register (vlib_main_t * vm, vnet_plugin_handoff_t * h,
+                     int from_early_init)
+{
+  ioam_export_main_t *em = &ioam_export_main;
+  clib_error_t *error = 0;
+
+  em->vlib_main = vm;
+  em->vnet_main = h->vnet_main;
+  em->ethernet_main = h->ethernet_main;
+
+  return error;
+}
+
+/* Action function shared between message handler and debug CLI */
+
+int
+ioam_export_ip6_enable_disable (ioam_export_main_t * em,
+                               u8 is_disable,
+                               ip4_address_t * collector_address,
+                               ip4_address_t * src_address)
+{
+  vlib_main_t *vm = em->vlib_main;
+
+  if (is_disable == 0)
+    {
+      if (1 == ioam_export_header_create (collector_address, src_address))
+       {
+         ioam_export_thread_buffer_init (vm);
+         ip6_hbh_set_next_override (em->my_hbh_slot);
+         /* Turn on the export buffer check process */
+         vlib_process_signal_event (vm, em->export_process_node_index, 1, 0);
+
+       }
+      else
+       {
+         return (-2);
+       }
+    }
+  else
+    {
+      ip6_hbh_set_next_override (IP6_LOOKUP_NEXT_POP_HOP_BY_HOP);
+      ioam_export_header_cleanup (collector_address, src_address);
+      ioam_export_thread_buffer_free ();
+      /* Turn off the export buffer check process */
+      vlib_process_signal_event (vm, em->export_process_node_index, 2, 0);
+
+    }
+
+  return 0;
+}
+
+/* API message handler */
+static void vl_api_ioam_export_ip6_enable_disable_t_handler
+  (vl_api_ioam_export_ip6_enable_disable_t * mp)
+{
+  vl_api_ioam_export_ip6_enable_disable_reply_t *rmp;
+  ioam_export_main_t *sm = &ioam_export_main;
+  int rv;
+
+  rv = ioam_export_ip6_enable_disable (sm, (int) (mp->is_disable),
+                                      (ip4_address_t *) mp->
+                                      collector_address,
+                                      (ip4_address_t *) mp->src_address);
+
+  REPLY_MACRO (VL_API_IOAM_EXPORT_IP6_ENABLE_DISABLE_REPLY);
+}
+
+/* Set up the API message handling tables */
+static clib_error_t *
+ioam_export_plugin_api_hookup (vlib_main_t * vm)
+{
+  ioam_export_main_t *sm = &ioam_export_main;
+#define _(N,n)                                                  \
+    vl_msg_api_set_handlers((VL_API_##N + sm->msg_id_base),     \
+                           #n,                                 \
+                           vl_api_##n##_t_handler,              \
+                           vl_noop_handler,                     \
+                           vl_api_##n##_t_endian,               \
+                           vl_api_##n##_t_print,                \
+                           sizeof(vl_api_##n##_t), 1);
+  foreach_ioam_export_plugin_api_msg;
+#undef _
+
+  return 0;
+}
+
+static clib_error_t *
+set_ioam_export_ipfix_command_fn (vlib_main_t * vm,
+                                 unformat_input_t * input,
+                                 vlib_cli_command_t * cmd)
+{
+  ioam_export_main_t *em = &ioam_export_main;
+  ip4_address_t collector, src;
+  u8 is_disable = 0;
+
+  collector.as_u32 = 0;
+  src.as_u32 = 0;
+
+  while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
+    {
+      if (unformat (input, "collector %U", unformat_ip4_address, &collector))
+       ;
+      else if (unformat (input, "src %U", unformat_ip4_address, &src))
+       ;
+      else if (unformat (input, "disable"))
+       is_disable = 1;
+      else
+       break;
+    }
+
+  if (collector.as_u32 == 0)
+    return clib_error_return (0, "collector address required");
+
+  if (src.as_u32 == 0)
+    return clib_error_return (0, "src address required");
+
+  em->ipfix_collector.as_u32 = collector.as_u32;
+  em->src_address.as_u32 = src.as_u32;
+
+  vlib_cli_output (vm, "Collector %U, src address %U",
+                  format_ip4_address, &em->ipfix_collector,
+                  format_ip4_address, &em->src_address);
+
+  /* Turn on the export timer process */
+  // vlib_process_signal_event (vm, flow_report_process_node.index,
+  //1, 0);
+  ioam_export_ip6_enable_disable (em, is_disable, &collector, &src);
+
+  return 0;
+}
+
+VLIB_CLI_COMMAND (set_ipfix_command, static) =
+{
+.path = "set ioam export ipfix",.short_help =
+    "set ioam export ipfix collector <ip4-address> src <ip4-address>",.
+    function = set_ioam_export_ipfix_command_fn,};
+
+
+static clib_error_t *
+ioam_export_init (vlib_main_t * vm)
+{
+  ioam_export_main_t *em = &ioam_export_main;
+  clib_error_t *error = 0;
+  u8 *name;
+  u32 node_index = export_node.index;
+  vlib_node_t *ip6_hbyh_node = NULL;
+
+  name = format (0, "ioam_export_%08x%c", api_version, 0);
+
+  /* Ask for a correctly-sized block of API message decode slots */
+  em->msg_id_base = vl_msg_api_get_msg_ids
+    ((char *) name, VL_MSG_FIRST_AVAILABLE);
+  em->unix_time_0 = (u32) time (0);    /* Store starting time */
+  em->vlib_time_0 = vlib_time_now (vm);
+
+  error = ioam_export_plugin_api_hookup (vm);
+
+  /* Hook this export node to ip6-hop-by-hop */
+  ip6_hbyh_node = vlib_get_node_by_name (vm, (u8 *) "ip6-hop-by-hop");
+  em->my_hbh_slot = vlib_node_add_next (vm, ip6_hbyh_node->index, node_index);
+  vec_free (name);
+
+  return error;
+}
+
+VLIB_INIT_FUNCTION (ioam_export_init);
diff --git a/plugins/ioam-plugin/ioam/export/ioam_export.h b/plugins/ioam-plugin/ioam/export/ioam_export.h
new file mode 100644 (file)
index 0000000..f4a461f
--- /dev/null
@@ -0,0 +1,326 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef __included_ioam_export_h__
+#define __included_ioam_export_h__
+
+#include <vnet/vnet.h>
+#include <vnet/ip/ip.h>
+#include <vnet/ip/ip_packet.h>
+#include <vnet/ip/ip4_packet.h>
+#include <vnet/ip/ip6_packet.h>
+#include <vnet/ip/udp.h>
+#include <vnet/flow/ipfix_packet.h>
+
+#include <vppinfra/pool.h>
+#include <vppinfra/hash.h>
+#include <vppinfra/error.h>
+#include <vppinfra/elog.h>
+
+#include <vlib/threads.h>
+
+typedef struct ioam_export_buffer {
+  /* Allocated buffer */
+  u32 buffer_index;
+  u64 touched_at;
+  u8 records_in_this_buffer;
+} ioam_export_buffer_t;
+
+
+typedef struct {
+  /* API message ID base */
+  u16 msg_id_base;
+
+  /* TODO: to support multiple collectors all this has to be grouped and create a vector here*/
+  u8 *record_header;
+  u32 sequence_number;
+  u32 domain_id;
+
+  /* ipfix collector, our ip address */
+  ip4_address_t ipfix_collector;
+  ip4_address_t src_address;
+
+  /* Pool of ioam_export_buffer_t */
+  ioam_export_buffer_t *buffer_pool;
+  /* Vector of per thread ioam_export_buffer_t to buffer pool index */
+  u32 *buffer_per_thread;
+  /* Lock per thread to swap buffers between worker and timer process*/
+  volatile u32 **lockp;
+
+  /* time scale transform*/
+  u32 unix_time_0;
+  f64 vlib_time_0;
+
+  /* convenience */
+  vlib_main_t * vlib_main;
+  vnet_main_t * vnet_main;
+  ethernet_main_t * ethernet_main;
+  u32 ip4_lookup_node_index;
+
+  uword my_hbh_slot;
+  u32 export_process_node_index;
+} ioam_export_main_t;
+
+ioam_export_main_t ioam_export_main;
+
+vlib_node_registration_t export_node;
+
+#define DEFAULT_EXPORT_SIZE (3 * CLIB_CACHE_LINE_BYTES)
+/*
+ *  Number of records in a buffer
+ * ~(MTU (1500) - [ip hdr(40) + UDP(8) + ipfix (24)]) / DEFAULT_EXPORT_SIZE
+ */
+#define DEFAULT_EXPORT_RECORDS 7
+
+always_inline ioam_export_buffer_t *ioam_export_get_my_buffer(u32 thread_id)
+{
+  ioam_export_main_t *em = &ioam_export_main;
+
+  if (vec_len(em->buffer_per_thread) > thread_id)
+    return(pool_elt_at_index(em->buffer_pool, em->buffer_per_thread[thread_id]));
+  return(0);
+}
+
+inline static int ioam_export_buffer_add_header (vlib_buffer_t *b0)
+{
+  ioam_export_main_t *em = &ioam_export_main;
+  clib_memcpy(b0->data, em->record_header, vec_len(em->record_header));
+  b0->current_data = 0;
+  b0->current_length = vec_len(em->record_header);
+  b0->flags |= VLIB_BUFFER_TOTAL_LENGTH_VALID;
+  return(1);
+}
+
+inline static int ioam_export_init_buffer (vlib_main_t *vm,
+                                          ioam_export_buffer_t *eb)
+{
+  vlib_buffer_t *b = 0;
+
+  if (!eb)
+    return(-1);
+  /* TODO: Perhaps buffer init from template here */
+  if (vlib_buffer_alloc (vm, &(eb->buffer_index), 1) != 1)
+    return(-2);
+  eb->records_in_this_buffer = 0;
+  eb->touched_at = vlib_time_now(vm);
+  b = vlib_get_buffer(vm, eb->buffer_index);
+  (void) ioam_export_buffer_add_header(b);
+  vnet_buffer(b)->sw_if_index[VLIB_RX] = 0;
+  vnet_buffer(b)->sw_if_index[VLIB_TX] = ~0;
+  return(1);
+}
+
+inline static void ioam_export_thread_buffer_free (void)
+{
+  ioam_export_main_t *em = &ioam_export_main;
+  vlib_main_t *vm = em->vlib_main;
+  ioam_export_buffer_t *eb = 0;
+  int i;
+  for (i = 0; i < vec_len(em->buffer_per_thread); i++)
+    {
+      eb = pool_elt_at_index(em->buffer_pool, em->buffer_per_thread[i]);
+      if (eb)
+       vlib_buffer_free(vm, &(eb->buffer_index), 1);
+    }
+  for (i = 0; i < vec_len(em->lockp); i++)
+    clib_mem_free((void *) em->lockp[i]);
+  vec_free(em->buffer_per_thread);
+  pool_free(em->buffer_pool);
+  vec_free(em->lockp);
+  em->buffer_per_thread = 0;
+  em->buffer_pool = 0;
+  em->lockp = 0;
+}
+
+inline static int ioam_export_thread_buffer_init (vlib_main_t *vm)
+{
+  ioam_export_main_t *em = &ioam_export_main;
+  int no_of_threads = vec_len(vlib_worker_threads);
+  int i;
+  ioam_export_buffer_t *eb = 0;
+  vlib_node_t * ip4_lookup_node;
+
+  pool_alloc_aligned(em->buffer_pool,
+                     no_of_threads - 1,
+                     CLIB_CACHE_LINE_BYTES);
+  vec_validate_aligned(em->buffer_per_thread,
+                       no_of_threads-1,
+                       CLIB_CACHE_LINE_BYTES);
+  vec_validate_aligned(em->lockp, no_of_threads-1,
+                      CLIB_CACHE_LINE_BYTES);
+  ip4_lookup_node = vlib_get_node_by_name (vm, (u8 *) "ip4-lookup");
+  em->ip4_lookup_node_index = ip4_lookup_node->index;
+  if (!em->buffer_per_thread || !em->buffer_pool || !em->lockp)
+    {
+      return(-1);
+    }
+  for (i=0; i < no_of_threads; i++)
+    {
+      eb = 0;
+      pool_get_aligned(em->buffer_pool, eb, CLIB_CACHE_LINE_BYTES);
+      memset(eb, 0, sizeof (*eb));
+      em->buffer_per_thread[i] = eb - em->buffer_pool;
+      if (ioam_export_init_buffer(vm, eb) != 1)
+       {
+         ioam_export_thread_buffer_free();
+         return(-2);
+       }
+      em->lockp[i] = clib_mem_alloc_aligned (CLIB_CACHE_LINE_BYTES,
+                                            CLIB_CACHE_LINE_BYTES);
+      memset ((void *) em->lockp[i], 0, CLIB_CACHE_LINE_BYTES);
+    }
+  return(1);
+}
+
+#define IPFIX_IOAM_EXPORT_ID 272
+
+/* Used to build the rewrite */
+/* data set packet */
+typedef struct {
+  ipfix_message_header_t h;
+  ipfix_set_header_t s;
+} ipfix_data_packet_t;
+
+typedef struct {
+  ip4_header_t ip4;
+  udp_header_t udp;
+  ipfix_data_packet_t ipfix;
+} ip4_ipfix_data_packet_t;
+
+
+inline static void ioam_export_header_cleanup (ip4_address_t * collector_address,
+                                              ip4_address_t * src_address)
+{
+  ioam_export_main_t *em = &ioam_export_main;
+  vec_free(em->record_header);
+  em->record_header = 0;
+}
+
+inline static int ioam_export_header_create (ip4_address_t * collector_address,
+                                            ip4_address_t * src_address)
+{
+  ioam_export_main_t *em = &ioam_export_main;
+  ip4_header_t * ip;
+  udp_header_t * udp;
+  ipfix_message_header_t * h;
+  ipfix_set_header_t * s;
+  u8 * rewrite = 0;
+  ip4_ipfix_data_packet_t * tp;
+
+
+  /* allocate rewrite space */
+  vec_validate_aligned (rewrite,
+                        sizeof (ip4_ipfix_data_packet_t) - 1,
+                        CLIB_CACHE_LINE_BYTES);
+
+  tp = (ip4_ipfix_data_packet_t *) rewrite;
+  ip = (ip4_header_t *) &tp->ip4;
+  udp = (udp_header_t *) (ip+1);
+  h = (ipfix_message_header_t *)(udp+1);
+  s = (ipfix_set_header_t *)(h+1);
+
+  ip->ip_version_and_header_length = 0x45;
+  ip->ttl = 254;
+  ip->protocol = IP_PROTOCOL_UDP;
+  ip->src_address.as_u32 = src_address->as_u32;
+  ip->dst_address.as_u32 = collector_address->as_u32;
+  udp->src_port = clib_host_to_net_u16 (4939 /* $$FIXME */);
+  udp->dst_port = clib_host_to_net_u16 (4939);
+  /* FIXUP: UDP length */
+  udp->length = clib_host_to_net_u16 (vec_len(rewrite) +
+    (DEFAULT_EXPORT_RECORDS * DEFAULT_EXPORT_SIZE) - sizeof (*ip));
+
+  /* FIXUP: message header export_time */
+  /* FIXUP: message header sequence_number */
+  h->domain_id = clib_host_to_net_u32 (em->domain_id);
+
+  /*FIXUP: Setid length in octets if records exported are not default*/
+  s->set_id_length = ipfix_set_id_length (IPFIX_IOAM_EXPORT_ID,
+    (sizeof(*s) + (DEFAULT_EXPORT_RECORDS * DEFAULT_EXPORT_SIZE)));
+
+  /* FIXUP: h version and length length in octets if records exported are not default */
+  h->version_length = version_length (sizeof(*h)+
+    (sizeof(*s) + (DEFAULT_EXPORT_RECORDS * DEFAULT_EXPORT_SIZE)));
+
+  /* FIXUP: ip length if records exported are not default */
+  /* FIXUP: ip checksum if records exported are not default */
+  ip->length = clib_host_to_net_u16 (vec_len(rewrite) +
+    (DEFAULT_EXPORT_RECORDS * DEFAULT_EXPORT_SIZE));
+  ip->checksum = ip4_header_checksum (ip);
+  _vec_len(rewrite) = sizeof(ip4_ipfix_data_packet_t);
+  em->record_header = rewrite;
+  return(1);
+}
+
+inline static int ioam_export_send_buffer (vlib_main_t *vm,
+    ioam_export_buffer_t *eb)
+{
+  ioam_export_main_t *em = &ioam_export_main;
+  ip4_header_t * ip;
+  udp_header_t * udp;
+  ipfix_message_header_t * h;
+  ipfix_set_header_t * s;
+  ip4_ipfix_data_packet_t * tp;
+  vlib_buffer_t *b0;
+  u16 new_l0, old_l0;
+  ip_csum_t sum0;
+  vlib_frame_t * nf = 0;
+  u32 * to_next;
+
+  b0 = vlib_get_buffer(vm, eb->buffer_index);
+  tp = vlib_buffer_get_current (b0);
+  ip = (ip4_header_t *) &tp->ip4;
+  udp = (udp_header_t *) (ip+1);
+  h = (ipfix_message_header_t *)(udp+1);
+  s = (ipfix_set_header_t *)(h+1);
+
+  /* FIXUP: message header export_time */
+  h->export_time = clib_host_to_net_u32((u32)
+    (((f64)em->unix_time_0) +
+    (vlib_time_now(em->vlib_main) - em->vlib_time_0)));
+
+  /* FIXUP: message header sequence_number */
+  h->sequence_number = clib_host_to_net_u32 (em->sequence_number++);
+
+  /* FIXUP: lengths if different from default */
+  if (PREDICT_FALSE(eb->records_in_this_buffer != DEFAULT_EXPORT_RECORDS)) {
+     s->set_id_length = ipfix_set_id_length (IPFIX_IOAM_EXPORT_ID /* set_id */,
+                                            b0->current_length -
+                                            (sizeof (*ip) + sizeof (*udp) +
+                                            sizeof (*h)));
+     h->version_length = version_length (b0->current_length -
+                                        (sizeof (*ip) + sizeof (*udp)));
+     sum0 = ip->checksum;
+     old_l0 = ip->length;
+     new_l0 = clib_host_to_net_u16 ((u16)b0->current_length);
+     sum0 = ip_csum_update (sum0, old_l0, new_l0, ip4_header_t,
+                            length /* changed member */);
+     ip->checksum = ip_csum_fold (sum0);
+     ip->length = new_l0;
+     udp->length = clib_host_to_net_u16 (b0->current_length - sizeof (*ip));
+  }
+
+  /* Enqueue pkts to ip4-lookup */
+
+  nf = vlib_get_frame_to_node (vm, em->ip4_lookup_node_index);
+  nf->n_vectors = 0;
+  to_next = vlib_frame_vector_args (nf);
+  nf->n_vectors = 1;
+  to_next[0] = eb->buffer_index;
+  vlib_put_frame_to_node(vm, em->ip4_lookup_node_index, nf);
+  return(1);
+
+}
+
+#endif /* __included_ioam_export_h__ */
diff --git a/plugins/ioam-plugin/ioam/export/ioam_export_all_api_h.h b/plugins/ioam-plugin/ioam/export/ioam_export_all_api_h.h
new file mode 100644 (file)
index 0000000..bc4368f
--- /dev/null
@@ -0,0 +1,16 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/* Include the generated file, see BUILT_SOURCES in Makefile.am */
+#include <ioam/export/ioam_export.api.h>
diff --git a/plugins/ioam-plugin/ioam/export/ioam_export_msg_enum.h b/plugins/ioam-plugin/ioam/export/ioam_export_msg_enum.h
new file mode 100644 (file)
index 0000000..c2de798
--- /dev/null
@@ -0,0 +1,28 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef included_ioam_export_msg_enum_h
+#define included_ioam_export_msg_enum_h
+
+#include <vppinfra/byte_order.h>
+
+#define vl_msg_id(n,h) n,
+typedef enum {
+#include <ioam/export/ioam_export_all_api_h.h>
+    /* We'll want to know how many messages IDs we need... */
+    VL_MSG_FIRST_AVAILABLE,
+} vl_msg_id_t;
+#undef vl_msg_id
+
+#endif /* included_ioam_export_msg_enum_h */
diff --git a/plugins/ioam-plugin/ioam/export/ioam_export_test.c b/plugins/ioam-plugin/ioam/export/ioam_export_test.c
new file mode 100644 (file)
index 0000000..695ce38
--- /dev/null
@@ -0,0 +1,206 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ *------------------------------------------------------------------
+ * ioam_export_test.c - test harness plugin
+ *------------------------------------------------------------------
+ */
+
+#include <vat/vat.h>
+#include <vlibapi/api.h>
+#include <vlibmemory/api.h>
+#include <vlibsocket/api.h>
+#include <vppinfra/error.h>
+
+
+/* Declare message IDs */
+#include <ioam/export/ioam_export_msg_enum.h>
+
+/* define message structures */
+#define vl_typedefs
+#include <ioam/export/ioam_export_all_api_h.h>
+#undef vl_typedefs
+
+/* declare message handlers for each api */
+
+#define vl_endianfun           /* define message structures */
+#include <ioam/export/ioam_export_all_api_h.h>
+#undef vl_endianfun
+
+/* instantiate all the print functions we know about */
+#define vl_print(handle, ...)
+#define vl_printfun
+#include <ioam/export/ioam_export_all_api_h.h>
+#undef vl_printfun
+
+/* Get the API version number. */
+#define vl_api_version(n,v) static u32 api_version=(v);
+#include <ioam/export/ioam_export_all_api_h.h>
+#undef vl_api_version
+
+
+typedef struct
+{
+  /* API message ID base */
+  u16 msg_id_base;
+  vat_main_t *vat_main;
+} export_test_main_t;
+
+export_test_main_t export_test_main;
+
+#define foreach_standard_reply_retval_handler   \
+_(ioam_export_ip6_enable_disable_reply)
+
+#define _(n)                                            \
+    static void vl_api_##n##_t_handler                  \
+    (vl_api_##n##_t * mp)                               \
+    {                                                   \
+        vat_main_t * vam = export_test_main.vat_main;   \
+        i32 retval = ntohl(mp->retval);                 \
+        if (vam->async_mode) {                          \
+            vam->async_errors += (retval < 0);          \
+        } else {                                        \
+            vam->retval = retval;                       \
+            vam->result_ready = 1;                      \
+        }                                               \
+    }
+foreach_standard_reply_retval_handler;
+#undef _
+
+/*
+ * Table of message reply handlers, must include boilerplate handlers
+ * we just generated
+ */
+#define foreach_vpe_api_reply_msg                                       \
+_(IOAM_EXPORT_IP6_ENABLE_DISABLE_REPLY, ioam_export_ip6_enable_disable_reply)
+
+
+/* M: construct, but don't yet send a message */
+
+#define M(T,t)                                                  \
+do {                                                            \
+    vam->result_ready = 0;                                      \
+    mp = vl_msg_api_alloc(sizeof(*mp));                         \
+    memset (mp, 0, sizeof (*mp));                               \
+    mp->_vl_msg_id = ntohs (VL_API_##T + sm->msg_id_base);      \
+    mp->client_index = vam->my_client_index;                    \
+} while(0);
+
+#define M2(T,t,n)                                               \
+do {                                                            \
+    vam->result_ready = 0;                                      \
+    mp = vl_msg_api_alloc(sizeof(*mp)+(n));                     \
+    memset (mp, 0, sizeof (*mp));                               \
+    mp->_vl_msg_id = ntohs (VL_API_##T + sm->msg_id_base);      \
+    mp->client_index = vam->my_client_index;                    \
+} while(0);
+
+/* S: send a message */
+#define S (vl_msg_api_send_shmem (vam->vl_input_queue, (u8 *)&mp))
+
+/* W: wait for results, with timeout */
+#define W                                       \
+do {                                            \
+    timeout = vat_time_now (vam) + 1.0;         \
+                                                \
+    while (vat_time_now (vam) < timeout) {      \
+        if (vam->result_ready == 1) {           \
+            return (vam->retval);               \
+        }                                       \
+    }                                           \
+    return -99;                                 \
+} while(0);
+
+static int
+api_ioam_export_ip6_enable_disable (vat_main_t * vam)
+{
+  export_test_main_t *sm = &export_test_main;
+  unformat_input_t *i = vam->input;
+  f64 timeout;
+  int is_disable = 0;
+  vl_api_ioam_export_ip6_enable_disable_t *mp;
+
+  /* Parse args required to build the message */
+  while (unformat_check_input (i) != UNFORMAT_END_OF_INPUT)
+    {
+      if (unformat (i, "disable"))
+       is_disable = 1;
+      else
+       break;
+    }
+
+  /* Construct the API message */
+  M (IOAM_EXPORT_IP6_ENABLE_DISABLE, ioam_export_ip6_enable_disable);
+  mp->is_disable = is_disable;
+
+  /* send it... */
+  S;
+
+  /* Wait for a reply... */
+  W;
+}
+
+/*
+ * List of messages that the api test plugin sends,
+ * and that the data plane plugin processes
+ */
+#define foreach_vpe_api_msg \
+_(ioam_export_ip6_enable_disable, "<intfc> [disable]")
+
+void
+vat_api_hookup (vat_main_t * vam)
+{
+  export_test_main_t *sm = &export_test_main;
+  /* Hook up handlers for replies from the data plane plug-in */
+#define _(N,n)                                                  \
+    vl_msg_api_set_handlers((VL_API_##N + sm->msg_id_base),     \
+                           #n,                                  \
+                           vl_api_##n##_t_handler,              \
+                           vl_noop_handler,                     \
+                           vl_api_##n##_t_endian,               \
+                           vl_api_##n##_t_print,                \
+                           sizeof(vl_api_##n##_t), 1);
+  foreach_vpe_api_reply_msg;
+#undef _
+
+  /* API messages we can send */
+#define _(n,h) hash_set_mem (vam->function_by_name, #n, api_##n);
+  foreach_vpe_api_msg;
+#undef _
+
+  /* Help strings */
+#define _(n,h) hash_set_mem (vam->help_by_name, #n, h);
+  foreach_vpe_api_msg;
+#undef _
+}
+
+clib_error_t *
+vat_plugin_register (vat_main_t * vam)
+{
+  export_test_main_t *sm = &export_test_main;
+  u8 *name;
+
+  sm->vat_main = vam;
+
+  name = format (0, "export_%08x%c", api_version, 0);
+  sm->msg_id_base = vl_client_get_first_plugin_msg_id ((char *) name);
+
+  if (sm->msg_id_base != (u16) ~ 0)
+    vat_api_hookup (vam);
+
+  vec_free (name);
+
+  return 0;
+}
diff --git a/plugins/ioam-plugin/ioam/export/ioam_export_thread.c b/plugins/ioam-plugin/ioam/export/ioam_export_thread.c
new file mode 100644 (file)
index 0000000..e64b0bf
--- /dev/null
@@ -0,0 +1,142 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * ioam_export_thread.c
+ */
+#include <vnet/api_errno.h>
+#include <vppinfra/pool.h>
+#include "ioam_export.h"
+
+static vlib_node_registration_t ioam_export_process_node;
+#define EXPORT_TIMEOUT (20.0)
+#define THREAD_PERIOD (30.0)
+
+static uword
+ioam_export_process (vlib_main_t * vm,
+                    vlib_node_runtime_t * rt, vlib_frame_t * f)
+{
+  ioam_export_main_t *em = &ioam_export_main;
+  f64 now;
+  f64 timeout = 30.0;
+  uword event_type;
+  uword *event_data = 0;
+  int i;
+  ioam_export_buffer_t *eb = 0, *new_eb = 0;
+  u32 *vec_buffer_indices = 0;
+  u32 *vec_buffer_to_be_sent = 0;
+  u32 *thread_index = 0;
+  u32 new_pool_index = 0;
+
+  em->export_process_node_index = ioam_export_process_node.index;
+  /* Wait for Godot... */
+  vlib_process_wait_for_event_or_clock (vm, 1e9);
+  event_type = vlib_process_get_events (vm, &event_data);
+  if (event_type != 1)
+    clib_warning ("bogus kickoff event received, %d", event_type);
+  vec_reset_length (event_data);
+
+  while (1)
+    {
+      vlib_process_wait_for_event_or_clock (vm, timeout);
+      event_type = vlib_process_get_events (vm, &event_data);
+      switch (event_type)
+       {
+       case 2:         /* Stop and Wait for kickoff again */
+         timeout = 1e9;
+         break;
+       case 1:         /* kickoff : Check for unsent buffers */
+         timeout = THREAD_PERIOD;
+         break;
+       case ~0:                /* timeout */
+         break;
+       }
+      vec_reset_length (event_data);
+      now = vlib_time_now (vm);
+      /*
+       * Create buffers for threads that are not active enough
+       * to send out the export records
+       */
+      for (i = 0; i < vec_len (em->buffer_per_thread); i++)
+       {
+         /* If the worker thread is processing export records ignore further checks */
+         if (*em->lockp[i] == 1)
+           continue;
+         eb = pool_elt_at_index (em->buffer_pool, em->buffer_per_thread[i]);
+         if (eb->records_in_this_buffer > 0 && now > (eb->touched_at + EXPORT_TIMEOUT))
+           {
+             pool_get_aligned (em->buffer_pool, new_eb,
+                               CLIB_CACHE_LINE_BYTES);
+             memset (new_eb, 0, sizeof (*new_eb));
+             if (ioam_export_init_buffer (vm, new_eb) == 1)
+               {
+                 new_pool_index = new_eb - em->buffer_pool;
+                 vec_add (vec_buffer_indices, &new_pool_index, 1);
+                 vec_add (vec_buffer_to_be_sent, &em->buffer_per_thread[i],
+                          1);
+                 vec_add (thread_index, &i, 1);
+               }
+             else
+               {
+                 pool_put (em->buffer_pool, new_eb);
+                 /*Give up */
+                 goto CLEANUP;
+               }
+           }
+       }
+      if (vec_len (thread_index) != 0)
+       {
+         /*
+          * Now swap the buffers out
+          */
+         for (i = 0; i < vec_len (thread_index); i++)
+           {
+             while (__sync_lock_test_and_set (em->lockp[thread_index[i]], 1))
+               ;
+             em->buffer_per_thread[thread_index[i]] =
+               vec_pop (vec_buffer_indices);
+             *em->lockp[thread_index[i]] = 0;
+           }
+
+         /* Send the buffers */
+         for (i = 0; i < vec_len (vec_buffer_to_be_sent); i++)
+           {
+             eb =
+               pool_elt_at_index (em->buffer_pool, vec_buffer_to_be_sent[i]);
+             ioam_export_send_buffer (vm, eb);
+             pool_put (em->buffer_pool, eb);
+           }
+       }
+
+    CLEANUP:
+      /* Free any leftover/unused buffers and everything that was allocated */
+      for (i = 0; i < vec_len (vec_buffer_indices); i++)
+       {
+         new_eb = pool_elt_at_index (em->buffer_pool, vec_buffer_indices[i]);
+         vlib_buffer_free (vm, &new_eb->buffer_index, 1);
+         pool_put (em->buffer_pool, new_eb);
+       }
+      vec_free (vec_buffer_indices);
+      vec_free (vec_buffer_to_be_sent);
+      vec_free (thread_index);
+    }
+  return 0;                    /* not so much */
+}
+
+VLIB_REGISTER_NODE (ioam_export_process_node, static) =
+{
+ .function = ioam_export_process,
+ .type = VLIB_NODE_TYPE_PROCESS,
+ .name = "ioam-export-process",
+};
diff --git a/plugins/ioam-plugin/ioam/export/node.c b/plugins/ioam-plugin/ioam/export/node.c
new file mode 100644 (file)
index 0000000..484bcb5
--- /dev/null
@@ -0,0 +1,352 @@
+/*
+ * Copyright (c) 2016 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include <vlib/vlib.h>
+#include <vnet/vnet.h>
+#include <vnet/pg/pg.h>
+#include <vppinfra/error.h>
+#include <vnet/ip/ip.h>
+#include <ioam/export/ioam_export.h>
+
+typedef struct
+{
+  u32 next_index;
+  u32 flow_label;
+} export_trace_t;
+
+/* packet trace format function */
+static u8 *
+format_export_trace (u8 * s, va_list * args)
+{
+  CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
+  CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
+  export_trace_t *t = va_arg (*args, export_trace_t *);
+
+  s = format (s, "EXPORT: flow_label %d, next index %d",
+             t->flow_label, t->next_index);
+  return s;
+}
+
+vlib_node_registration_t export_node;
+
+#define foreach_export_error \
+_(RECORDED, "Packets recorded for export")
+
+typedef enum
+{
+#define _(sym,str) EXPORT_ERROR_##sym,
+  foreach_export_error
+#undef _
+    EXPORT_N_ERROR,
+} export_error_t;
+
+static char *export_error_strings[] = {
+#define _(sym,string) string,
+  foreach_export_error
+#undef _
+};
+
+typedef enum
+{
+  EXPORT_NEXT_POP_HBYH,
+  EXPORT_N_NEXT,
+} export_next_t;
+
+always_inline void
+copy3cachelines (void *dst, const void *src, size_t n)
+{
+#if 0
+  if (PREDICT_FALSE (n < DEFAULT_EXPORT_SIZE))
+    {
+      /* Copy only the first 1/2 cache lines whatever is available */
+      if (n >= 64)
+       clib_mov64 ((u8 *) dst, (const u8 *) src);
+      if (n >= 128)
+       clib_mov64 ((u8 *) dst + 64, (const u8 *) src + 64);
+      return;
+    }
+  clib_mov64 ((u8 *) dst, (const u8 *) src);
+  clib_mov64 ((u8 *) dst + 64, (const u8 *) src + 64);
+  clib_mov64 ((u8 *) dst + 128, (const u8 *) src + 128);
+#endif
+#if 1
+
+  u64 *copy_dst, *copy_src;
+  int i;
+  copy_dst = (u64 *) dst;
+  copy_src = (u64 *) src;
+  if (PREDICT_FALSE (n < DEFAULT_EXPORT_SIZE))
+    {
+      for (i = 0; i < n / 64; i++)
+       {
+         copy_dst[0] = copy_src[0];
+         copy_dst[1] = copy_src[1];
+         copy_dst[2] = copy_src[2];
+         copy_dst[3] = copy_src[3];
+         copy_dst[4] = copy_src[4];
+         copy_dst[5] = copy_src[5];
+         copy_dst[6] = copy_src[6];
+         copy_dst[7] = copy_src[7];
+         copy_dst += 8;
+         copy_src += 8;
+       }
+      return;
+    }
+  for (i = 0; i < 3; i++)
+    {
+      copy_dst[0] = copy_src[0];
+      copy_dst[1] = copy_src[1];
+      copy_dst[2] = copy_src[2];
+      copy_dst[3] = copy_src[3];
+      copy_dst[4] = copy_src[4];
+      copy_dst[5] = copy_src[5];
+      copy_dst[6] = copy_src[6];
+      copy_dst[7] = copy_src[7];
+      copy_dst += 8;
+      copy_src += 8;
+    }
+#endif
+}
+
+static uword
+ip6_export_node_fn (vlib_main_t * vm,
+                   vlib_node_runtime_t * node, vlib_frame_t * frame)
+{
+  ioam_export_main_t *em = &ioam_export_main;
+  u32 n_left_from, *from, *to_next;
+  export_next_t next_index;
+  u32 pkts_recorded = 0;
+  ioam_export_buffer_t *my_buf = 0;
+  vlib_buffer_t *eb0 = 0;
+  u32 ebi0 = 0;
+  from = vlib_frame_vector_args (frame);
+  n_left_from = frame->n_vectors;
+  next_index = node->cached_next_index;
+
+  while (__sync_lock_test_and_set (em->lockp[vm->cpu_index], 1))
+    ;
+  my_buf = ioam_export_get_my_buffer (vm->cpu_index);
+  my_buf->touched_at = vlib_time_now (vm);
+  while (n_left_from > 0)
+    {
+      u32 n_left_to_next;
+
+      vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
+      while (n_left_from >= 4 && n_left_to_next >= 2)
+       {
+         u32 next0 = EXPORT_NEXT_POP_HBYH;
+         u32 next1 = EXPORT_NEXT_POP_HBYH;
+         u32 bi0, bi1;
+         ip6_header_t *ip60, *ip61;
+         vlib_buffer_t *p0, *p1;
+         u32 ip_len0, ip_len1;
+
+         /* Prefetch next iteration. */
+         {
+           vlib_buffer_t *p2, *p3;
+
+           p2 = vlib_get_buffer (vm, from[2]);
+           p3 = vlib_get_buffer (vm, from[3]);
+
+           vlib_prefetch_buffer_header (p2, LOAD);
+           vlib_prefetch_buffer_header (p3, LOAD);
+
+           /* IPv6 + HbyH header + Trace option */
+           /* 40   +           2 + [4 hdr] + [16]* no_of_nodes */
+           /* 3 cache lines can get v6 hdr + trace option with upto 9 node trace */
+           CLIB_PREFETCH (p2->data, 3 * CLIB_CACHE_LINE_BYTES, LOAD);
+           CLIB_PREFETCH (p3->data, 3 * CLIB_CACHE_LINE_BYTES, LOAD);
+         }
+
+         /* speculatively enqueue p0 and p1 to the current next frame */
+         to_next[0] = bi0 = from[0];
+         to_next[1] = bi1 = from[1];
+         from += 2;
+         to_next += 2;
+         n_left_from -= 2;
+         n_left_to_next -= 2;
+
+         p0 = vlib_get_buffer (vm, bi0);
+         p1 = vlib_get_buffer (vm, bi1);
+
+         ip60 = vlib_buffer_get_current (p0);
+         ip61 = vlib_buffer_get_current (p1);
+
+         ip_len0 =
+           clib_net_to_host_u16 (ip60->payload_length) +
+           sizeof (ip6_header_t);
+         ip_len1 =
+           clib_net_to_host_u16 (ip61->payload_length) +
+           sizeof (ip6_header_t);
+
+         ebi0 = my_buf->buffer_index;
+         eb0 = vlib_get_buffer (vm, ebi0);
+         if (PREDICT_FALSE (eb0 == 0))
+           goto NO_BUFFER1;
+
+         ip_len0 =
+           ip_len0 > DEFAULT_EXPORT_SIZE ? DEFAULT_EXPORT_SIZE : ip_len0;
+         ip_len1 =
+           ip_len1 > DEFAULT_EXPORT_SIZE ? DEFAULT_EXPORT_SIZE : ip_len1;
+
+         copy3cachelines (eb0->data + eb0->current_length, ip60, ip_len0);
+         eb0->current_length += DEFAULT_EXPORT_SIZE;
+         /* To maintain uniform size per export, each
+          * record is default size, ip6 hdr can be
+          * used to parse the record correctly
+          */
+         my_buf->records_in_this_buffer++;
+         /* if number of buf exceeds max that fits in a MTU sized buffer
+          * ship it to the queue and pick new one
+          */
+         if (my_buf->records_in_this_buffer >= DEFAULT_EXPORT_RECORDS)
+           {
+             ioam_export_send_buffer (vm, my_buf);
+             ioam_export_init_buffer (vm, my_buf);
+           }
+
+         ebi0 = my_buf->buffer_index;
+         eb0 = vlib_get_buffer (vm, ebi0);
+         if (PREDICT_FALSE (eb0 == 0))
+           goto NO_BUFFER1;
+
+         copy3cachelines (eb0->data + eb0->current_length, ip61, ip_len1);
+         eb0->current_length += DEFAULT_EXPORT_SIZE;
+         my_buf->records_in_this_buffer++;
+         if (my_buf->records_in_this_buffer >= DEFAULT_EXPORT_RECORDS)
+           {
+             ioam_export_send_buffer (vm, my_buf);
+             ioam_export_init_buffer (vm, my_buf);
+           }
+
+         pkts_recorded += 2;
+
+         if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE)))
+           {
+             if (p0->flags & VLIB_BUFFER_IS_TRACED)
+               {
+                 export_trace_t *t =
+                   vlib_add_trace (vm, node, p0, sizeof (*t));
+                 t->flow_label =
+                   clib_net_to_host_u32 (ip60->
+                                         ip_version_traffic_class_and_flow_label);
+                 t->next_index = next0;
+               }
+             if (p1->flags & VLIB_BUFFER_IS_TRACED)
+               {
+                 export_trace_t *t =
+                   vlib_add_trace (vm, node, p1, sizeof (*t));
+                 t->flow_label =
+                   clib_net_to_host_u32 (ip61->
+                                         ip_version_traffic_class_and_flow_label);
+                 t->next_index = next1;
+               }
+           }
+       NO_BUFFER1:
+         /* verify speculative enqueues, maybe switch current next frame */
+         vlib_validate_buffer_enqueue_x2 (vm, node, next_index,
+                                          to_next, n_left_to_next,
+                                          bi0, bi1, next0, next1);
+       }
+
+      while (n_left_from > 0 && n_left_to_next > 0)
+       {
+         u32 bi0;
+         vlib_buffer_t *p0;
+         u32 next0 = EXPORT_NEXT_POP_HBYH;
+         ip6_header_t *ip60;
+         u32 ip_len0;
+
+         /* speculatively enqueue p0 to the current next frame */
+         bi0 = from[0];
+         to_next[0] = bi0;
+         from += 1;
+         to_next += 1;
+         n_left_from -= 1;
+         n_left_to_next -= 1;
+
+         p0 = vlib_get_buffer (vm, bi0);
+         ip60 = vlib_buffer_get_current (p0);
+         ip_len0 =
+           clib_net_to_host_u16 (ip60->payload_length) +
+           sizeof (ip6_header_t);
+
+         ebi0 = my_buf->buffer_index;
+         eb0 = vlib_get_buffer (vm, ebi0);
+         if (PREDICT_FALSE (eb0 == 0))
+           goto NO_BUFFER;
+
+         ip_len0 =
+           ip_len0 > DEFAULT_EXPORT_SIZE ? DEFAULT_EXPORT_SIZE : ip_len0;
+         copy3cachelines (eb0->data + eb0->current_length, ip60, ip_len0);
+         eb0->current_length += DEFAULT_EXPORT_SIZE;
+         /* To maintain uniform size per export, each
+          * record is default size, ip6 hdr can be
+          * used to parse the record correctly
+          */
+         my_buf->records_in_this_buffer++;
+         /* if number of buf exceeds max that fits in a MTU sized buffer
+          * ship it to the queue and pick new one
+          */
+         if (my_buf->records_in_this_buffer >= DEFAULT_EXPORT_RECORDS)
+           {
+             ioam_export_send_buffer (vm, my_buf);
+             ioam_export_init_buffer (vm, my_buf);
+           }
+         if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE)
+                            && (p0->flags & VLIB_BUFFER_IS_TRACED)))
+           {
+             export_trace_t *t = vlib_add_trace (vm, node, p0, sizeof (*t));
+             t->flow_label =
+               clib_net_to_host_u32 (ip60->
+                                     ip_version_traffic_class_and_flow_label);
+             t->next_index = next0;
+           }
+
+         pkts_recorded += 1;
+       NO_BUFFER:
+         /* verify speculative enqueue, maybe switch current next frame */
+         vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
+                                          to_next, n_left_to_next,
+                                          bi0, next0);
+       }
+
+      vlib_put_next_frame (vm, node, next_index, n_left_to_next);
+    }
+
+  vlib_node_increment_counter (vm, export_node.index,
+                              EXPORT_ERROR_RECORDED, pkts_recorded);
+  *em->lockp[vm->cpu_index] = 0;
+  return frame->n_vectors;
+}
+
+/*
+ * Node for IP6 export
+ */
+VLIB_REGISTER_NODE (export_node) =
+{
+  .function = ip6_export_node_fn,
+  .name = "ip6-export",
+  .vector_size = sizeof (u32),
+  .format_trace = format_export_trace,
+  .type = VLIB_NODE_TYPE_INTERNAL,
+  .n_errors = ARRAY_LEN (export_error_strings),
+  .error_strings = export_error_strings,
+  .n_next_nodes = EXPORT_N_NEXT,
+  /* edit / add dispositions here */
+  .next_nodes =
+  {
+    [EXPORT_NEXT_POP_HBYH] = "ip6-pop-hop-by-hop"
+  },
+};
index 0d5d8d0..b43e2da 100644 (file)
@@ -578,6 +578,7 @@ typedef struct {
   /* Array of function pointers to HBH option handling routines */
   int (*options[256])(vlib_buffer_t *b, ip6_header_t *ip, ip6_hop_by_hop_option_t *opt);
   u8 *(*trace[256])(u8 *s, ip6_hop_by_hop_option_t *opt);
+  uword next_override;
 } ip6_hop_by_hop_main_t;
 
 extern ip6_hop_by_hop_main_t ip6_hop_by_hop_main;
@@ -586,6 +587,7 @@ int ip6_hbh_register_option (u8 option,
                             int options(vlib_buffer_t *b, ip6_header_t *ip, ip6_hop_by_hop_option_t *opt),
                             u8 *trace(u8 *s, ip6_hop_by_hop_option_t *opt));
 int ip6_hbh_unregister_option (u8 option);
+void ip6_hbh_set_next_override (uword next);
 
 /* Flag used by IOAM code. Classifier sets it pop-hop-by-hop checks it */
 #define OI_DECAP   100
index f79acf7..723b090 100644 (file)
@@ -765,7 +765,7 @@ ip6_lookup_inline (vlib_main_t * vm,
          ip0 = vlib_buffer_get_current (p0);
          ip1 = vlib_buffer_get_current (p1);
 
-         if (is_indirect)
+         if (PREDICT_FALSE(is_indirect))
            {
              ip_adjacency_t * iadj0, * iadj1;
              iadj0 = ip_get_adjacency (lm, vnet_buffer(p0)->ip.adj_index[VLIB_TX]);
@@ -911,7 +911,7 @@ ip6_lookup_inline (vlib_main_t * vm,
 
          ip0 = vlib_buffer_get_current (p0);
 
-         if (is_indirect)
+         if (PREDICT_FALSE(is_indirect))
            {
              ip_adjacency_t * iadj0;
              iadj0 = ip_get_adjacency (lm, vnet_buffer(p0)->ip.adj_index[VLIB_TX]);
@@ -2813,6 +2813,76 @@ format_ip6_hop_by_hop_trace (u8 * s, va_list * args)
   return s;
 }
 
+always_inline u8 ip6_scan_hbh_options (
+                                      vlib_buffer_t * b0,
+                                      ip6_header_t *ip0,
+                                      ip6_hop_by_hop_header_t *hbh0,
+                                      ip6_hop_by_hop_option_t *opt0,
+                                      ip6_hop_by_hop_option_t *limit0,
+                                      u32 *next0)
+{
+  ip6_hop_by_hop_main_t *hm = &ip6_hop_by_hop_main;
+  u8 type0;
+  u8 error0 = 0;
+
+  while (opt0 < limit0)
+    {
+      type0 = opt0->type;
+      switch (type0)
+       {
+       case 0: /* Pad1 */
+         opt0 = (ip6_hop_by_hop_option_t *) ((u8 *)opt0) + 1;
+         continue;
+       case 1: /* PadN */
+         break;
+       default:
+         if (hm->options[type0])
+           {
+             if ((*hm->options[type0])(b0, ip0, opt0) < 0)
+               {
+                 error0 = IP6_HOP_BY_HOP_ERROR_FORMAT;
+                 return(error0);
+               }
+           }
+         else
+           {
+             /* Unrecognized mandatory option, check the two high order bits */
+             switch (opt0->type & HBH_OPTION_TYPE_HIGH_ORDER_BITS)
+               {
+               case HBH_OPTION_TYPE_SKIP_UNKNOWN:
+                 break;
+               case HBH_OPTION_TYPE_DISCARD_UNKNOWN:
+                 error0 = IP6_HOP_BY_HOP_ERROR_UNKNOWN_OPTION;
+                 *next0 = IP_LOOKUP_NEXT_DROP;
+                 break;
+               case HBH_OPTION_TYPE_DISCARD_UNKNOWN_ICMP:
+                 error0 = IP6_HOP_BY_HOP_ERROR_UNKNOWN_OPTION;
+                 *next0 = IP_LOOKUP_NEXT_ICMP_ERROR;
+                 icmp6_error_set_vnet_buffer(b0, ICMP6_parameter_problem,
+                                             ICMP6_parameter_problem_unrecognized_option, (u8 *)opt0 - (u8 *)ip0);
+                 break;
+               case HBH_OPTION_TYPE_DISCARD_UNKNOWN_ICMP_NOT_MCAST:
+                 error0 = IP6_HOP_BY_HOP_ERROR_UNKNOWN_OPTION;
+                 if (!ip6_address_is_multicast(&ip0->dst_address))
+                   {
+                     *next0 =  IP_LOOKUP_NEXT_ICMP_ERROR;
+                     icmp6_error_set_vnet_buffer(b0, ICMP6_parameter_problem,
+                                                 ICMP6_parameter_problem_unrecognized_option, (u8 *)opt0 - (u8 *)ip0);
+                   }
+                 else
+                   {
+                     *next0 =  IP_LOOKUP_NEXT_DROP;
+                   }
+                 break;
+               }
+             return(error0);
+           }
+       }
+      opt0 = (ip6_hop_by_hop_option_t *) (((u8 *)opt0) + opt0->length + sizeof (ip6_hop_by_hop_option_t));
+    }
+  return(error0);
+}
+
 /*
  * Process the Hop-by-Hop Options header
  */
@@ -2837,6 +2907,116 @@ ip6_hop_by_hop (vlib_main_t * vm,
 
     vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next);
 
+    while (n_left_from >= 4 && n_left_to_next >= 2) {
+      u32 bi0, bi1;
+      vlib_buffer_t * b0, *b1;
+      u32 next0, next1;
+      ip6_header_t * ip0, *ip1;
+      ip6_hop_by_hop_header_t *hbh0, *hbh1;
+      ip6_hop_by_hop_option_t *opt0, *limit0, *opt1, *limit1;
+      u8 error0 = 0, error1 = 0;
+
+      /* Prefetch next iteration. */
+      {
+       vlib_buffer_t * p2, * p3;
+
+       p2 = vlib_get_buffer (vm, from[2]);
+       p3 = vlib_get_buffer (vm, from[3]);
+
+       vlib_prefetch_buffer_header (p2, LOAD);
+       vlib_prefetch_buffer_header (p3, LOAD);
+
+       CLIB_PREFETCH (p2->data, 2*CLIB_CACHE_LINE_BYTES, LOAD);
+       CLIB_PREFETCH (p3->data, 2*CLIB_CACHE_LINE_BYTES, LOAD);
+      }
+
+      /* Speculatively enqueue b0, b1 to the current next frame */
+      to_next[0] = bi0 = from[0];
+      to_next[1] = bi1 = from[1];
+      from += 2;
+      to_next += 2;
+      n_left_from -= 2;
+      n_left_to_next -= 2;
+
+      b0 = vlib_get_buffer (vm, bi0);
+      b1 = vlib_get_buffer (vm, bi1);
+      u32 adj_index0 = vnet_buffer(b0)->ip.adj_index[VLIB_TX];
+      ip_adjacency_t *adj0 = ip_get_adjacency(lm, adj_index0);
+      u32 adj_index1 = vnet_buffer(b1)->ip.adj_index[VLIB_TX];
+      ip_adjacency_t *adj1 = ip_get_adjacency(lm, adj_index1);
+
+      /* Default use the next_index from the adjacency. A HBH option rarely redirects to a different node */
+      next0 = adj0->lookup_next_index;
+      next1 = adj1->lookup_next_index;
+
+      ip0 = vlib_buffer_get_current (b0);
+      ip1 = vlib_buffer_get_current (b1);
+      hbh0 = (ip6_hop_by_hop_header_t *)(ip0+1);
+      hbh1 = (ip6_hop_by_hop_header_t *)(ip1+1);
+      opt0 = (ip6_hop_by_hop_option_t *)(hbh0+1);
+      opt1 = (ip6_hop_by_hop_option_t *)(hbh1+1);
+      limit0 = (ip6_hop_by_hop_option_t *)((u8 *)hbh0 + ((hbh0->length + 1) << 3));
+      limit1 = (ip6_hop_by_hop_option_t *)((u8 *)hbh1 + ((hbh1->length + 1) << 3));
+
+      /*
+       * Basic validity checks
+       */
+      if ((hbh0->length + 1) << 3 > clib_net_to_host_u16(ip0->payload_length)) {
+       error0 = IP6_HOP_BY_HOP_ERROR_FORMAT;
+       next0 = IP_LOOKUP_NEXT_DROP;
+       goto outdual;
+      }
+      /* Scan the set of h-b-h options, process ones that we understand */
+      error0 = ip6_scan_hbh_options(b0, ip0, hbh0, opt0, limit0, &next0);
+
+      if ((hbh1->length + 1) << 3 > clib_net_to_host_u16(ip1->payload_length)) {
+       error1 = IP6_HOP_BY_HOP_ERROR_FORMAT;
+       next1 = IP_LOOKUP_NEXT_DROP;
+       goto outdual;
+      }
+      /* Scan the set of h-b-h options, process ones that we understand */
+      error1 = ip6_scan_hbh_options(b1,ip1,hbh1,opt1,limit1, &next1);
+
+    outdual:
+      /* Has the classifier flagged this buffer for special treatment? */
+      if ((error0 == 0) && (vnet_buffer(b0)->l2_classify.opaque_index == OI_DECAP))
+       next0 = hm->next_override;
+
+      /* Has the classifier flagged this buffer for special treatment? */
+      if ((error1 == 0) && (vnet_buffer(b1)->l2_classify.opaque_index == OI_DECAP))
+       next1 = hm->next_override;
+
+      if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE)))
+       {
+         if (b0->flags & VLIB_BUFFER_IS_TRACED) {
+           ip6_hop_by_hop_trace_t *t = vlib_add_trace(vm, node, b0, sizeof (*t));
+           u32 trace_len = (hbh0->length + 1) << 3;
+           t->next_index = next0;
+           /* Capture the h-b-h option verbatim */
+           trace_len = trace_len < ARRAY_LEN(t->option_data) ? trace_len : ARRAY_LEN(t->option_data);
+           t->trace_len = trace_len;
+           clib_memcpy(t->option_data, hbh0, trace_len);
+         }
+         if (b1->flags & VLIB_BUFFER_IS_TRACED) {
+           ip6_hop_by_hop_trace_t *t = vlib_add_trace(vm, node, b1, sizeof (*t));
+           u32 trace_len = (hbh1->length + 1) << 3;
+           t->next_index = next1;
+           /* Capture the h-b-h option verbatim */
+           trace_len = trace_len < ARRAY_LEN(t->option_data) ? trace_len : ARRAY_LEN(t->option_data);
+           t->trace_len = trace_len;
+           clib_memcpy(t->option_data, hbh1, trace_len);
+         }
+
+       }
+
+      b0->error = error_node->errors[error0];
+      b1->error = error_node->errors[error1];
+
+      /* verify speculative enqueue, maybe switch current next frame */
+      vlib_validate_buffer_enqueue_x2 (vm, node, next_index, to_next, n_left_to_next, bi0,
+                                      bi1,next0, next1);
+    }
+
     while (n_left_from > 0 && n_left_to_next > 0) {
       u32 bi0;
       vlib_buffer_t * b0;
@@ -2844,7 +3024,6 @@ ip6_hop_by_hop (vlib_main_t * vm,
       ip6_header_t * ip0;
       ip6_hop_by_hop_header_t *hbh0;
       ip6_hop_by_hop_option_t *opt0, *limit0;
-      u8 type0;
       u8 error0 = 0;
 
       /* Speculatively enqueue b0 to the current next frame */
@@ -2876,54 +3055,12 @@ ip6_hop_by_hop (vlib_main_t * vm,
       }
 
       /* Scan the set of h-b-h options, process ones that we understand */
-      while (opt0 < limit0) {
-       type0 = opt0->type;
-       switch (type0) {
-       case 0: /* Pad1 */
-         opt0 = (ip6_hop_by_hop_option_t *) ((u8 *)opt0) + 1;
-         continue;
-       case 1: /* PadN */
-         break;
-       default:
-         if (hm->options[type0]) {
-           if ((*hm->options[type0])(b0, ip0, opt0) < 0) {
-             error0 = IP6_HOP_BY_HOP_ERROR_FORMAT;
-             goto out0;
-           }
-         } else {
-           /* Unrecognized mandatory option, check the two high order bits */
-           switch (opt0->type & HBH_OPTION_TYPE_HIGH_ORDER_BITS) {
-           case HBH_OPTION_TYPE_SKIP_UNKNOWN:
-             break;
-           case HBH_OPTION_TYPE_DISCARD_UNKNOWN:
-             next0 = IP_LOOKUP_NEXT_DROP;
-             break;
-           case HBH_OPTION_TYPE_DISCARD_UNKNOWN_ICMP:
-             next0 = IP_LOOKUP_NEXT_ICMP_ERROR;
-             icmp6_error_set_vnet_buffer(b0, ICMP6_parameter_problem,
-                                         ICMP6_parameter_problem_unrecognized_option, (u8 *)opt0 - (u8 *)ip0);
-             break;
-           case HBH_OPTION_TYPE_DISCARD_UNKNOWN_ICMP_NOT_MCAST:
-             if (!ip6_address_is_multicast(&ip0->dst_address)) {
-               next0 =  IP_LOOKUP_NEXT_ICMP_ERROR;
-               icmp6_error_set_vnet_buffer(b0, ICMP6_parameter_problem,
-                                           ICMP6_parameter_problem_unrecognized_option, (u8 *)opt0 - (u8 *)ip0);
-             } else {
-               next0 =  IP_LOOKUP_NEXT_DROP;
-             }
-             break;
-           }
-           error0 = IP6_HOP_BY_HOP_ERROR_UNKNOWN_OPTION;
-           goto out0;
-         }
-       }
-       opt0 = (ip6_hop_by_hop_option_t *) (((u8 *)opt0) + opt0->length + sizeof (ip6_hop_by_hop_option_t));
-      }
+      error0 = ip6_scan_hbh_options(b0, ip0, hbh0, opt0, limit0, &next0);
 
     out0:
       /* Has the classifier flagged this buffer for special treatment? */
       if ((error0 == 0) && (vnet_buffer(b0)->l2_classify.opaque_index == OI_DECAP))
-       next0 = IP6_LOOKUP_NEXT_POP_HOP_BY_HOP;
+       next0 = hm->next_override;
 
       if (PREDICT_FALSE(b0->flags & VLIB_BUFFER_IS_TRACED)) {
        ip6_hop_by_hop_trace_t *t = vlib_add_trace(vm, node, b0, sizeof (*t));
@@ -2965,12 +3102,19 @@ ip6_hop_by_hop_init (vlib_main_t * vm)
   ip6_hop_by_hop_main_t * hm = &ip6_hop_by_hop_main;
   memset(hm->options, 0, sizeof(hm->options));
   memset(hm->trace, 0, sizeof(hm->trace));
-
+  hm->next_override = IP6_LOOKUP_NEXT_POP_HOP_BY_HOP;
   return (0);
 }
 
 VLIB_INIT_FUNCTION (ip6_hop_by_hop_init);
 
+void ip6_hbh_set_next_override (uword next)
+{
+  ip6_hop_by_hop_main_t * hm = &ip6_hop_by_hop_main;
+
+  hm->next_override = next;
+}
+
 int
 ip6_hbh_register_option (u8 option,
                         int options(vlib_buffer_t *b, ip6_header_t *ip, ip6_hop_by_hop_option_t *opt),
index c9b1515..7038556 100644 (file)
@@ -285,80 +285,114 @@ ip6_add_hop_by_hop_node_fn (vlib_main_t * vm,
 
       vlib_get_next_frame (vm, node, next_index,
                           to_next, n_left_to_next);
-
-#if 0
       while (n_left_from >= 4 && n_left_to_next >= 2)
-       {
-          u32 next0 = IP6_ADD_HOP_BY_HOP_NEXT_INTERFACE_OUTPUT;
-          u32 next1 = IP6_ADD_HOP_BY_HOP_NEXT_INTERFACE_OUTPUT;
-          u32 sw_if_index0, sw_if_index1;
-          u8 tmp0[6], tmp1[6];
-          ethernet_header_t *en0, *en1;
+        {
           u32 bi0, bi1;
-         vlib_buffer_t * b0, * b1;
-          
-         /* Prefetch next iteration. */
-         {
-           vlib_buffer_t * p2, * p3;
-            
-           p2 = vlib_get_buffer (vm, from[2]);
-           p3 = vlib_get_buffer (vm, from[3]);
-            
-           vlib_prefetch_buffer_header (p2, LOAD);
-           vlib_prefetch_buffer_header (p3, LOAD);
+          vlib_buffer_t * b0, *b1;
+          u32 next0, next1;
+          ip6_header_t * ip0, *ip1;
+          ip6_hop_by_hop_header_t * hbh0, *hbh1;
+          u64 * copy_src0, * copy_dst0, *copy_src1, *copy_dst1;
+          u16 new_l0, new_l1;
 
-           CLIB_PREFETCH (p2->data, CLIB_CACHE_LINE_BYTES, STORE);
-           CLIB_PREFETCH (p3->data, CLIB_CACHE_LINE_BYTES, STORE);
-         }
+          /* Prefetch next iteration. */
+          {
+            vlib_buffer_t * p2, * p3;
+
+            p2 = vlib_get_buffer (vm, from[2]);
+            p3 = vlib_get_buffer (vm, from[3]);
+
+            vlib_prefetch_buffer_header (p2, LOAD);
+            vlib_prefetch_buffer_header (p3, LOAD);
+
+            CLIB_PREFETCH (p2->data - rewrite_length, 2 * CLIB_CACHE_LINE_BYTES, STORE);
+            CLIB_PREFETCH (p3->data - rewrite_length, 2 * CLIB_CACHE_LINE_BYTES, STORE);
+          }
 
           /* speculatively enqueue b0 and b1 to the current next frame */
-         to_next[0] = bi0 = from[0];
-         to_next[1] = bi1 = from[1];
-         from += 2;
-         to_next += 2;
-         n_left_from -= 2;
-         n_left_to_next -= 2;
+          to_next[0] = bi0 = from[0];
+          to_next[1] = bi1 = from[1];
+          from += 2;
+          to_next += 2;
+          n_left_from -= 2;
+          n_left_to_next -= 2;
 
-         b0 = vlib_get_buffer (vm, bi0);
-         b1 = vlib_get_buffer (vm, bi1);
+          b0 = vlib_get_buffer (vm, bi0);
+          b1 = vlib_get_buffer (vm, bi1);
 
           /* $$$$$ Dual loop: process 2 x packets here $$$$$ */
           ASSERT (b0->current_data == 0);
           ASSERT (b1->current_data == 0);
-          
+
+          ip0 = vlib_buffer_get_current (b0);
+          ip1 = vlib_buffer_get_current (b1);
+
+          /* Copy the ip header left by the required amount */
+          copy_dst0 = (u64 *)(((u8 *)ip0) - rewrite_length);
+          copy_dst1 = (u64 *)(((u8 *)ip1) - rewrite_length);
+          copy_src0 = (u64 *) ip0;
+          copy_src1 = (u64 *) ip1;
+
+          copy_dst0 [0] = copy_src0 [0];
+          copy_dst0 [1] = copy_src0 [1];
+          copy_dst0 [2] = copy_src0 [2];
+          copy_dst0 [3] = copy_src0 [3];
+          copy_dst0 [4] = copy_src0 [4];
+
+          copy_dst1 [0] = copy_src1 [0];
+          copy_dst1 [1] = copy_src1 [1];
+          copy_dst1 [2] = copy_src1 [2];
+          copy_dst1 [3] = copy_src1 [3];
+          copy_dst1 [4] = copy_src1 [4];
+
+          vlib_buffer_advance (b0, - (word)rewrite_length);
+          vlib_buffer_advance (b1, - (word)rewrite_length);
           ip0 = vlib_buffer_get_current (b0);
-          ip1 = vlib_buffer_get_current (b0);
+          ip1 = vlib_buffer_get_current (b1);
+
+          hbh0 = (ip6_hop_by_hop_header_t *)(ip0 + 1);
+          hbh1 = (ip6_hop_by_hop_header_t *)(ip1 + 1);
+          /* $$$ tune, rewrite_length is a multiple of 8 */
+          clib_memcpy (hbh0, rewrite, rewrite_length);
+          clib_memcpy (hbh1, rewrite, rewrite_length);
+          /* Patch the protocol chain, insert the h-b-h (type 0) header */
+          hbh0->protocol = ip0->protocol;
+          hbh1->protocol = ip1->protocol;
+          ip0->protocol = 0;
+          ip1->protocol = 0;
+          new_l0 = clib_net_to_host_u16 (ip0->payload_length) + rewrite_length;
+          new_l1 = clib_net_to_host_u16 (ip1->payload_length) + rewrite_length;
+          ip0->payload_length = clib_host_to_net_u16 (new_l0);
+          ip1->payload_length = clib_host_to_net_u16 (new_l1);
+
+          /* Populate the (first) h-b-h list elt */
+          next0 = IP6_HBYH_IOAM_INPUT_NEXT_IP6_LOOKUP;
+          next1 = IP6_HBYH_IOAM_INPUT_NEXT_IP6_LOOKUP;
 
-          sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
-          sw_if_index1 = vnet_buffer(b1)->sw_if_index[VLIB_RX];
 
           /* $$$$$ End of processing 2 x packets $$$$$ */
 
           if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE)))
             {
-              if (b0->flags & VLIB_BUFFER_IS_TRACED) 
+              if (b0->flags & VLIB_BUFFER_IS_TRACED)
                 {
-                    ip6_add_hop_by_hop_trace_t *t = 
-                      vlib_add_trace (vm, node, b0, sizeof (*t));
-                    t->sw_if_index = sw_if_index0;
-                    t->next_index = next0;
-                  }
-                if (b1->flags & VLIB_BUFFER_IS_TRACED) 
-                  {
-                    ip6_add_hop_by_hop_trace_t *t = 
-                      vlib_add_trace (vm, node, b1, sizeof (*t));
-                    t->sw_if_index = sw_if_index1;
-                    t->next_index = next1;
-                  }
-              }
-            
-            /* verify speculative enqueues, maybe switch current next frame */
-            vlib_validate_buffer_enqueue_x2 (vm, node, next_index,
-                                             to_next, n_left_to_next,
-                                             bi0, bi1, next0, next1);
+                 ip6_add_hop_by_hop_trace_t *t =
+                   vlib_add_trace (vm, node, b0, sizeof (*t));
+                 t->next_index = next0;
+               }
+             if (b1->flags & VLIB_BUFFER_IS_TRACED)
+               {
+                 ip6_add_hop_by_hop_trace_t *t =
+                   vlib_add_trace (vm, node, b1, sizeof (*t));
+                 t->next_index = next1;
+               }
+           }
+         processed+=2;
+         /* verify speculative enqueues, maybe switch current next frame */
+         vlib_validate_buffer_enqueue_x2 (vm, node, next_index,
+                                          to_next, n_left_to_next,
+                                          bi0, bi1, next0, next1);
         }
-#endif
-
       while (n_left_from > 0 && n_left_to_next > 0)
        {
           u32 bi0;
@@ -581,24 +615,25 @@ ip6_pop_hop_by_hop_node_fn (vlib_main_t * vm,
       vlib_get_next_frame (vm, node, next_index,
                           to_next, n_left_to_next);
 
-#if 0
       while (n_left_from >= 4 && n_left_to_next >= 2)
        {
-          u32 next0 = IP6_POP_HOP_BY_HOP_NEXT_INTERFACE_OUTPUT;
-          u32 next1 = IP6_POP_HOP_BY_HOP_NEXT_INTERFACE_OUTPUT;
-          u32 sw_if_index0, sw_if_index1;
-          u8 tmp0[6], tmp1[6];
-          ethernet_header_t *en0, *en1;
           u32 bi0, bi1;
          vlib_buffer_t * b0, * b1;
-          
+         u32 next0, next1;
+         u32 adj_index0, adj_index1;
+         ip6_header_t * ip0, *ip1;
+         ip_adjacency_t * adj0, *adj1;
+         ip6_hop_by_hop_header_t *hbh0, *hbh1;
+         u64 *copy_dst0, *copy_src0, *copy_dst1, *copy_src1;
+         u16 new_l0, new_l1;
+
          /* Prefetch next iteration. */
          {
            vlib_buffer_t * p2, * p3;
-            
+
            p2 = vlib_get_buffer (vm, from[2]);
            p3 = vlib_get_buffer (vm, from[3]);
-            
+
            vlib_prefetch_buffer_header (p2, LOAD);
            vlib_prefetch_buffer_header (p3, LOAD);
 
@@ -620,39 +655,75 @@ ip6_pop_hop_by_hop_node_fn (vlib_main_t * vm,
           /* $$$$$ Dual loop: process 2 x packets here $$$$$ */
           ASSERT (b0->current_data == 0);
           ASSERT (b1->current_data == 0);
-          
+
           ip0 = vlib_buffer_get_current (b0);
-          ip1 = vlib_buffer_get_current (b0);
+          ip1 = vlib_buffer_get_current (b1);
+         adj_index0 = vnet_buffer (b0)->ip.adj_index[VLIB_TX];
+         adj_index1 = vnet_buffer (b1)->ip.adj_index[VLIB_TX];
+         adj0 = ip_get_adjacency (lm, adj_index0);
+         adj1 = ip_get_adjacency (lm, adj_index1);
+
+         next0 = adj0->lookup_next_index;
+         next1 = adj1->lookup_next_index;
+
+         hbh0 = (ip6_hop_by_hop_header_t *)(ip0+1);
+         hbh1 = (ip6_hop_by_hop_header_t *)(ip1+1);
+
+         ioam_pop_hop_by_hop_processing(vm, ip0, hbh0);
+         ioam_pop_hop_by_hop_processing(vm, ip1, hbh1);
+
+         vlib_buffer_advance (b0, (hbh0->length+1)<<3);
+         vlib_buffer_advance (b1, (hbh1->length+1)<<3);
+
+         new_l0 = clib_net_to_host_u16 (ip0->payload_length) -
+           ((hbh0->length+1)<<3);
+         new_l1 = clib_net_to_host_u16 (ip1->payload_length) -
+           ((hbh1->length+1)<<3);
 
-          sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
-          sw_if_index1 = vnet_buffer(b1)->sw_if_index[VLIB_RX];
+         ip0->payload_length = clib_host_to_net_u16 (new_l0);
+         ip1->payload_length = clib_host_to_net_u16 (new_l1);
+
+         ip0->protocol = hbh0->protocol;
+         ip1->protocol = hbh1->protocol;
 
+         copy_src0 = (u64 *)ip0;
+         copy_src1 = (u64 *)ip1;
+         copy_dst0 = copy_src0 + (hbh0->length+1);
+         copy_dst0 [4] = copy_src0[4];
+         copy_dst0 [3] = copy_src0[3];
+         copy_dst0 [2] = copy_src0[2];
+         copy_dst0 [1] = copy_src0[1];
+         copy_dst0 [0] = copy_src0[0];
+         copy_dst1 = copy_src1 + (hbh1->length+1);
+         copy_dst1 [4] = copy_src1[4];
+         copy_dst1 [3] = copy_src1[3];
+         copy_dst1 [2] = copy_src1[2];
+         copy_dst1 [1] = copy_src1[1];
+         copy_dst1 [0] = copy_src1[0];
+         processed+=2;
           /* $$$$$ End of processing 2 x packets $$$$$ */
 
           if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE)))
             {
-              if (b0->flags & VLIB_BUFFER_IS_TRACED) 
+              if (b0->flags & VLIB_BUFFER_IS_TRACED)
                 {
-                    ip6_pop_hop_by_hop_trace_t *t = 
+                    ip6_pop_hop_by_hop_trace_t *t =
                       vlib_add_trace (vm, node, b0, sizeof (*t));
-                    t->sw_if_index = sw_if_index0;
                     t->next_index = next0;
                   }
-                if (b1->flags & VLIB_BUFFER_IS_TRACED) 
+                if (b1->flags & VLIB_BUFFER_IS_TRACED)
                   {
-                    ip6_pop_hop_by_hop_trace_t *t = 
+                    ip6_pop_hop_by_hop_trace_t *t =
                       vlib_add_trace (vm, node, b1, sizeof (*t));
-                    t->sw_if_index = sw_if_index1;
                     t->next_index = next1;
                   }
               }
-            
+
             /* verify speculative enqueues, maybe switch current next frame */
             vlib_validate_buffer_enqueue_x2 (vm, node, next_index,
                                              to_next, n_left_to_next,
                                              bi0, bi1, next0, next1);
         }
-#endif
 
       while (n_left_from > 0 && n_left_to_next > 0)
        {