VPP-48 Fixes for ip4/6 ttl checks and icmp responses
[vpp.git] / vnet / vnet / ip / ip6_hop_by_hop.c
index 50dc38b..f6e10f0 100644 (file)
@@ -32,48 +32,19 @@ static f64 trace_tsp_mul[4] = {1, 1e3, 1e6, 1e9};
 
 char *ppc_state[] = {"None", "Encap", "Decap"};
 
-ip6_hop_by_hop_main_t ip6_hop_by_hop_main;
+ip6_hop_by_hop_ioam_main_t ip6_hop_by_hop_ioam_main;
 
-/*
- * ip6 hop-by-hop option handling. We push pkts with h-b-h options to
- * ip6_hop_by_hop_node_fn from ip6-lookup at a cost of ~2 clocks/pkt in
- * the speed path.
- * 
- * We parse through the h-b-h option TLVs, specifically looking for
- * HBH_OPTION_TYPE_IOAM_DATA_LIST. [Someone needs to get bananas from
- * IANA, aka to actually allocate the option TLV codes.]
- * 
- * If we find the indicated option type, and we have remaining list
- * elements in the trace list, allocate and populate the trace list
- * element. 
- *
- * At the ingress edge: punch in the h-b-h rewrite, then visit the
- * standard h-b-h option handler. We have to be careful in the standard 
- * h-b-h handler, to avoid looping until we run out of rewrite space.
- * Ask me how I know that.
- * 
- * Remaining work:
- *  decide on egress point "pop and count" scheme
- *  time stamp handling: usec since the top of the hour?
- *  configure the node id
- *  trace list application data support
- *  cons up analysis / steering plug-in(s)
- *  add configuration binary APIs, vpp_api_test_support, yang models and
- *  orca code
- *  perf tune: dual loop, replace memcpy w/ N x 8-byte load/stores
- *  
- */
+#define foreach_ip6_hbyh_ioam_input_next       \
+  _(IP6_REWRITE, "ip6-rewrite")                        \
+  _(IP6_LOOKUP, "ip6-lookup")                  \
+  _(DROP, "error-drop")                        
 
-/* 
- * primary h-b-h handler trace support
- * We work pretty hard on the problem for obvious reasons
- */
-typedef struct {
-  u32 next_index;
-  u32 trace_len;
-  u32 timestamp_msbs; /* Store the top set of bits of timestamp */
-  u8 option_data[256];
-} ip6_hop_by_hop_trace_t;
+typedef enum {
+#define _(s,n) IP6_HBYH_IOAM_INPUT_NEXT_##s,
+  foreach_ip6_hbyh_ioam_input_next
+#undef _
+  IP6_HBYH_IOAM_INPUT_N_NEXT,
+} ip6_hbyh_ioam_input_next_t;
 
 typedef union {
     u64 as_u64;
@@ -142,380 +113,121 @@ static u8 * format_ioam_data_list_element (u8 * s, va_list * args)
   return s;
 }
 
-static u8 * format_ip6_hop_by_hop_trace (u8 * s, va_list * args)
+u8 *
+ip6_hbh_ioam_trace_data_list_trace_handler (u8 *s, ip6_hop_by_hop_option_t *opt)
 {
-  CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *);
-  CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *);
-  ip6_hop_by_hop_trace_t * t = va_arg (*args, ip6_hop_by_hop_trace_t *);
-  ip6_hop_by_hop_header_t *hbh0;
-  ip6_hop_by_hop_option_t *opt0, *limit0;
-  ioam_trace_option_t * trace0;
+  ioam_trace_option_t *trace;
   u8 trace_data_size_in_words = 0;
-  u32 * elt0;
-  int elt_index;
-  u8 type0;
-  
-  hbh0 = (ip6_hop_by_hop_header_t *)t->option_data;
-
-  s = format (s, "IP6_HOP_BY_HOP: next index %d len %d traced %d\n",
-              t->next_index, (hbh0->length+1)<<3, t->trace_len);
-  
-  opt0 = (ip6_hop_by_hop_option_t *) (hbh0+1);
-  limit0 = (ip6_hop_by_hop_option_t *) ((u8 *)hbh0) + t->trace_len;
+  u32 *elt;
+  int elt_index = 0;
 
-  while (opt0 < limit0)
-    {
-      type0 = opt0->type & HBH_OPTION_TYPE_MASK;
-      elt_index = 0;
-      switch (type0)
-        {
-        case HBH_OPTION_TYPE_IOAM_TRACE_DATA_LIST:
-          trace0 = (ioam_trace_option_t *)opt0;
-          s = format (s, "  Trace Type 0x%x , %d elts left ts msb(s) 0x%x\n", 
-                      trace0->ioam_trace_type, trace0->data_list_elts_left,
-                      t->timestamp_msbs);
-          trace_data_size_in_words = 
-            fetch_trace_data_size(trace0->ioam_trace_type)/4;
-          elt0 = &trace0->elts[0];
-          while ((u8 *) elt0 < 
-                 ((u8 *)(&trace0->elts[0]) + trace0->hdr.length - 2 
-                  /* -2 accounts for ioam_trace_type,elts_left */))
-            {
-              s = format (s, "    [%d] %U\n",elt_index,  
-                          format_ioam_data_list_element, 
-                          elt0, &trace0->ioam_trace_type);
-              elt_index++;
-              elt0 += trace_data_size_in_words;
-            }
-          
-          opt0 = (ip6_hop_by_hop_option_t *) 
-            (((u8 *)opt0) + opt0->length 
-             + sizeof (ip6_hop_by_hop_option_t));
-          break;
-
-        case HBH_OPTION_TYPE_IOAM_PROOF_OF_WORK:
-          s = format (s, "    POW opt present\n");
-          opt0 = (ip6_hop_by_hop_option_t *) 
-            (((u8 *)opt0) + sizeof (ioam_pow_option_t));
-          break;
-          
-        case 0: /* Pad, just stop */
-          opt0 = (ip6_hop_by_hop_option_t *) ((u8 *)opt0) + 1;
-          break;
-
-        default:
-          s = format (s, "Unknown %d", type0);
-          opt0 = (ip6_hop_by_hop_option_t *) 
-            (((u8 *)opt0) + opt0->length 
-             + sizeof (ip6_hop_by_hop_option_t));
-          break;
-        }
-    }
-  return s;
+  trace = (ioam_trace_option_t *)opt;
+#if 0
+  s = format (s, "  Trace Type 0x%x , %d elts left ts msb(s) 0x%x\n", trace->ioam_trace_type, trace->data_list_elts_left,
+             t->timestamp_msbs);
+#endif
+  s = format (s, "  Trace Type 0x%x , %d elts left\n", trace->ioam_trace_type, trace->data_list_elts_left);
+  trace_data_size_in_words = fetch_trace_data_size(trace->ioam_trace_type)/4;
+  elt = &trace->elts[0];
+  while ((u8 *) elt < ((u8 *)(&trace->elts[0]) + trace->hdr.length - 2
+                       /* -2 accounts for ioam_trace_type,elts_left */)) {
+    s = format (s, "    [%d] %U\n",elt_index,
+               format_ioam_data_list_element,
+               elt, &trace->ioam_trace_type);
+    elt_index++;
+    elt += trace_data_size_in_words;
+  }
+  return (s);
 }
 
-vlib_node_registration_t ip6_hop_by_hop_node;
-
-#define foreach_ip6_hop_by_hop_error \
-_(PROCESSED, "Pkts with ip6 hop-by-hop options") \
-_(UNKNOWN_OPTION, "Unknown ip6 hop-by-hop options")
-
-typedef enum {
-#define _(sym,str) IP6_HOP_BY_HOP_ERROR_##sym,
-  foreach_ip6_hop_by_hop_error
-#undef _
-  IP6_HOP_BY_HOP_N_ERROR,
-} ip6_hop_by_hop_error_t;
-
-static char * ip6_hop_by_hop_error_strings[] = {
-#define _(sym,string) string,
-  foreach_ip6_hop_by_hop_error
-#undef _
-};
-
-static uword
-ip6_hop_by_hop_node_fn (vlib_main_t * vm,
-                 vlib_node_runtime_t * node,
-                 vlib_frame_t * frame)
+int
+ip6_hbh_ioam_trace_data_list_handler (vlib_buffer_t *b, ip6_header_t *ip, ip6_hop_by_hop_option_t *opt)
 {
   ip6_main_t * im = &ip6_main;
   ip_lookup_main_t * lm = &im->lookup_main;
-  ip6_hop_by_hop_main_t * hm = &ip6_hop_by_hop_main;
-  u32 n_left_from, * from, * to_next;
-  ip_lookup_next_t next_index;
-  u32 processed = 0, unknown_opts = 0;
+  ip6_hop_by_hop_ioam_main_t * hm = &ip6_hop_by_hop_ioam_main;
   u8 elt_index = 0;
+  ioam_trace_option_t *trace = (ioam_trace_option_t *)opt;
+  u32 adj_index = vnet_buffer (b)->ip.adj_index[VLIB_TX];
+  ip_adjacency_t *adj = ip_get_adjacency (lm, adj_index);
   time_u64_t time_u64;
+  u32 *elt;
+  int rv = 0;
 
   time_u64.as_u64 = 0;
-  from = vlib_frame_vector_args (frame);
-  n_left_from = frame->n_vectors;
-  next_index = node->cached_next_index;
-
-  while (n_left_from > 0)
-    {
-      u32 n_left_to_next;
-
-      vlib_get_next_frame (vm, node, next_index,
-                          to_next, n_left_to_next);
 
-#if 0 /* $$$ DUAL-LOOP ME */
-      while (n_left_from >= 4 && n_left_to_next >= 2)
-       {
-          u32 next0 = IP6_HOP_BY_HOP_NEXT_INTERFACE_OUTPUT;
-          u32 next1 = IP6_HOP_BY_HOP_NEXT_INTERFACE_OUTPUT;
-          u32 sw_if_index0, sw_if_index1;
-          u8 tmp0[6], tmp1[6];
-          ethernet_header_t *en0, *en1;
-          u32 bi0, bi1;
-         vlib_buffer_t * b0, * b1;
-          
-         /* Prefetch next iteration. */
-         {
-           vlib_buffer_t * p2, * p3;
-            
-           p2 = vlib_get_buffer (vm, from[2]);
-           p3 = vlib_get_buffer (vm, from[3]);
-            
-           vlib_prefetch_buffer_header (p2, LOAD);
-           vlib_prefetch_buffer_header (p3, LOAD);
-
-           CLIB_PREFETCH (p2->data, CLIB_CACHE_LINE_BYTES, STORE);
-           CLIB_PREFETCH (p3->data, CLIB_CACHE_LINE_BYTES, STORE);
-         }
-
-          /* speculatively enqueue b0 and b1 to the current next frame */
-         to_next[0] = bi0 = from[0];
-         to_next[1] = bi1 = from[1];
-         from += 2;
-         to_next += 2;
-         n_left_from -= 2;
-         n_left_to_next -= 2;
-
-         b0 = vlib_get_buffer (vm, bi0);
-         b1 = vlib_get_buffer (vm, bi1);
-
-          /* $$$$$ Dual loop: process 2 x packets here $$$$$ */
-          ASSERT (b0->current_data == 0);
-          ASSERT (b1->current_data == 0);
-          
-          ip0 = vlib_buffer_get_current (b0);
-          ip1 = vlib_buffer_get_current (b0);
-
-          sw_if_index0 = vnet_buffer(b0)->sw_if_index[VLIB_RX];
-          sw_if_index1 = vnet_buffer(b1)->sw_if_index[VLIB_RX];
-
-          /* $$$$$ End of processing 2 x packets $$$$$ */
+  if (PREDICT_TRUE (trace->data_list_elts_left)) {
+    trace->data_list_elts_left--;
+    /* fetch_trace_data_size returns in bytes. Convert it to 4-bytes
+     * to skip to this node's location.
+     */
+    elt_index = trace->data_list_elts_left * fetch_trace_data_size(trace->ioam_trace_type) / 4;
+    elt = &trace->elts[elt_index];
+    if (trace->ioam_trace_type & BIT_TTL_NODEID) {
+      *elt = clib_host_to_net_u32 ((ip->hop_limit<<24) | hm->node_id);
+      elt++;
+    }
 
-          if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE)))
-            {
-              if (b0->flags & VLIB_BUFFER_IS_TRACED) 
-                {
-                    ip6_hop_by_hop_trace_t *t = 
-                      vlib_add_trace (vm, node, b0, sizeof (*t));
-                    t->sw_if_index = sw_if_index0;
-                    t->next_index = next0;
-                  }
-                if (b1->flags & VLIB_BUFFER_IS_TRACED) 
-                  {
-                    ip6_hop_by_hop_trace_t *t = 
-                      vlib_add_trace (vm, node, b1, sizeof (*t));
-                    t->sw_if_index = sw_if_index1;
-                    t->next_index = next1;
-                  }
-              }
-            
-            /* verify speculative enqueues, maybe switch current next frame */
-            vlib_validate_buffer_enqueue_x2 (vm, node, next_index,
-                                             to_next, n_left_to_next,
-                                             bi0, bi1, next0, next1);
-        }
-#endif
+    if (trace->ioam_trace_type & BIT_ING_INTERFACE) {
+      *elt = (vnet_buffer(b)->sw_if_index[VLIB_RX]&0xFFFF) << 16 | (adj->rewrite_header.sw_if_index & 0xFFFF);
+      *elt = clib_host_to_net_u32(*elt);
+      elt++;
+    }
+                 
+    if (trace->ioam_trace_type & BIT_TIMESTAMP) {
+      /* Send least significant 32 bits */
+      f64 time_f64 = (f64)(((f64)hm->unix_time_0) + (vlib_time_now(hm->vlib_main) - hm->vlib_time_0));
 
-      while (n_left_from > 0 && n_left_to_next > 0)
-       {
-          u32 bi0;
-         vlib_buffer_t * b0;
-          u32 next0;
-          u32 adj_index0;
-          ip6_header_t * ip0;
-          ip_adjacency_t * adj0;
-          ip6_hop_by_hop_header_t *hbh0;
-          ip6_hop_by_hop_option_t *opt0, *limit0;
-          ioam_trace_option_t * trace0;
-          u32 * elt0;
-          u8 type0;
-         
-          /* speculatively enqueue b0 to the current next frame */
-         bi0 = from[0];
-         to_next[0] = bi0;
-         from += 1;
-         to_next += 1;
-         n_left_from -= 1;
-         n_left_to_next -= 1;
+      time_u64.as_u64 = time_f64 * trace_tsp_mul[hm->trace_tsp];
+      *elt = clib_host_to_net_u32(time_u64.as_u32[0]);
+      elt++;
+    }
 
-         b0 = vlib_get_buffer (vm, bi0);
+    if (trace->ioam_trace_type & BIT_APPDATA) {
+      /* $$$ set elt0->app_data */
+      *elt = clib_host_to_net_u32(hm->app_data);
+      elt++;
+    }
+  }
+  return (rv);
+}
 
-          ip0 = vlib_buffer_get_current (b0);
-          adj_index0 = vnet_buffer (b0)->ip.adj_index[VLIB_TX];
-          adj0 = ip_get_adjacency (lm, adj_index0);
-          hbh0 = (ip6_hop_by_hop_header_t *)(ip0+1);
-          opt0 = (ip6_hop_by_hop_option_t *)(hbh0+1);
-          limit0 = (ip6_hop_by_hop_option_t *)
-            ((u8 *)hbh0 + ((hbh0->length+1)<<3));
-          
-          /* Scan the set of h-b-h options, process ones that we understand */
-          while (opt0 < limit0)
-            {
-              type0 = opt0->type & HBH_OPTION_TYPE_MASK;
-              switch (type0)
-                {
-                case HBH_OPTION_TYPE_IOAM_TRACE_DATA_LIST:
-                  trace0 = (ioam_trace_option_t *)opt0;
-                  if (PREDICT_TRUE (trace0->data_list_elts_left))
-                    {
-                      trace0->data_list_elts_left--;
-                      /* fetch_trace_data_size returns in bytes. Convert it to 4-bytes
-                       * to skip to this node's location.
-                       */
-                      elt_index = trace0->data_list_elts_left *
-                                  fetch_trace_data_size(trace0->ioam_trace_type)/4;
-                      elt0 = &trace0->elts[elt_index];
-                      if (trace0->ioam_trace_type & BIT_TTL_NODEID) 
-                        {
-                          *elt0 = 
-                            clib_host_to_net_u32 ((ip0->hop_limit<<24) 
-                                              | hm->node_id);
-                          elt0++;
-                        }
-
-                      if (trace0->ioam_trace_type & BIT_ING_INTERFACE) 
-                        {
-                          *elt0 =
-                          (vnet_buffer(b0)->sw_if_index[VLIB_RX]&0xFFFF) << 16 |                           (adj0->rewrite_header.sw_if_index & 0xFFFF);
-                          *elt0 = clib_host_to_net_u32(*elt0);
-                          elt0++;
-                        }
-                 
-                      if (trace0->ioam_trace_type & BIT_TIMESTAMP)
-                        {
-                            /* Send least significant 32 bits */
-                            f64 time_f64 = (f64)(((f64)hm->unix_time_0) +
-                              (vlib_time_now(hm->vlib_main) - hm->vlib_time_0));
-
-                            time_u64.as_u64 = 
-                               time_f64 * trace_tsp_mul[hm->trace_tsp];
-                            *elt0 = clib_host_to_net_u32(time_u64.as_u32[0]);
-                            elt0++;
-                        }
-
-                      if (trace0->ioam_trace_type & BIT_APPDATA)
-                        {
-                          /* $$$ set elt0->app_data */
-                          *elt0 = clib_host_to_net_u32(hm->app_data);
-                          elt0++;
-                        }
-                    }
-
-                  opt0 = (ip6_hop_by_hop_option_t *) 
-                    (((u8 *)opt0) + opt0->length 
-                     + sizeof (ip6_hop_by_hop_option_t));
-                  break;
-
-                case HBH_OPTION_TYPE_IOAM_PROOF_OF_WORK:
-                  opt0 = (ip6_hop_by_hop_option_t *) 
-                    (((u8 *)opt0) + sizeof (ioam_pow_option_t));
-                  break;
-
-                case 0: /* Pad */
-                  opt0 = (ip6_hop_by_hop_option_t *) ((u8 *)opt0) + 1;
-                  goto out0;
-
-                default:
-                  opt0 = (ip6_hop_by_hop_option_t *)
-                  (((u8 *)opt0) + opt0->length
-                  + sizeof (ip6_hop_by_hop_option_t));
-                  unknown_opts++;
-                  break;
-                }
-            }
+/* The main h-b-h tracer will be invoked, no need to do much here */
+int
+ip6_hbh_add_register_option (u8 option,
+                            u8 size,
+                            int rewrite_options(u8 *rewrite_string, u8 rewrite_size))
+{
+  ip6_hop_by_hop_ioam_main_t * hm = &ip6_hop_by_hop_ioam_main;
 
-        out0:
+  ASSERT (option < ARRAY_LEN (hm->add_options));
 
-          /* 
-           * Since we push pkts here from the h-b-h header imposition code
-           * we have to be careful what we wish for...
-           */
-          next0 = adj0->lookup_next_index != IP_LOOKUP_NEXT_ADD_HOP_BY_HOP ?
-              adj0->lookup_next_index : adj0->saved_lookup_next_index;
+  /* Already registered */
+  if (hm->add_options[option])
+    return (-1);
 
-          if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE) 
-                            && (b0->flags & VLIB_BUFFER_IS_TRACED))) 
-            {
-              ip6_hop_by_hop_trace_t *t = 
-                 vlib_add_trace (vm, node, b0, sizeof (*t));
-              u32 trace_len = (hbh0->length+1)<<3;
-              t->next_index = next0;
-              /* Capture the h-b-h option verbatim */
-              trace_len = trace_len < ARRAY_LEN(t->option_data) ? 
-                trace_len : ARRAY_LEN(t->option_data);
-              t->trace_len = trace_len;
-              t->timestamp_msbs = time_u64.as_u32[1];
-              memcpy (t->option_data, hbh0, trace_len);
-            }
-            
-          processed++;
+  hm->add_options[option] = rewrite_options;
+  hm->options_size[option] = size;
+  
+  return (0);
+}
 
-          /* verify speculative enqueue, maybe switch current next frame */
-         vlib_validate_buffer_enqueue_x1 (vm, node, next_index,
-                                          to_next, n_left_to_next,
-                                          bi0, next0);
-       }
+int
+ip6_hbh_add_unregister_option (u8 option)
+{
+  ip6_hop_by_hop_ioam_main_t * hm = &ip6_hop_by_hop_ioam_main;
 
-      vlib_put_next_frame (vm, node, next_index, n_left_to_next);
-    }
+  ASSERT (option < ARRAY_LEN (hm->add_options));
 
-    if (PREDICT_FALSE(unknown_opts > 0)) {
-      vlib_node_increment_counter (vm, ip6_hop_by_hop_node.index,
-                                   IP6_HOP_BY_HOP_ERROR_UNKNOWN_OPTION, unknown_opts);
-    }
+  /* Not registered */
+  if (!hm->add_options[option])
+    return (-1);
 
-  vlib_node_increment_counter (vm, ip6_hop_by_hop_node.index, 
-                               IP6_HOP_BY_HOP_ERROR_PROCESSED, processed);
-  return frame->n_vectors;
+  hm->add_options[option] = NULL;
+  hm->options_size[option] = 0;
+  return (0);
 }
 
-VLIB_REGISTER_NODE (ip6_hop_by_hop_node) = {
-  .function = ip6_hop_by_hop_node_fn,
-  .name = "ip6-hop-by-hop",
-  .vector_size = sizeof (u32),
-  .format_trace = format_ip6_hop_by_hop_trace,
-  .type = VLIB_NODE_TYPE_INTERNAL,
-  
-  .n_errors = ARRAY_LEN(ip6_hop_by_hop_error_strings),
-  .error_strings = ip6_hop_by_hop_error_strings,
-
-  /* See ip/lookup.h */
-  .n_next_nodes = IP_LOOKUP_N_NEXT,
-  .next_nodes = {
-    [IP_LOOKUP_NEXT_MISS] = "ip6-miss",
-    [IP_LOOKUP_NEXT_DROP] = "ip6-drop",
-    [IP_LOOKUP_NEXT_PUNT] = "ip6-punt",
-    [IP_LOOKUP_NEXT_LOCAL] = "ip6-local",
-    [IP_LOOKUP_NEXT_ARP] = "ip6-discover-neighbor",
-    [IP_LOOKUP_NEXT_REWRITE] = "ip6-rewrite",
-    [IP_LOOKUP_NEXT_CLASSIFY] = "ip6-classify",
-    [IP_LOOKUP_NEXT_MAP] = "ip6-map",
-    [IP_LOOKUP_NEXT_MAP_T] = "ip6-map-t",
-    [IP_LOOKUP_NEXT_SIXRD] = "ip6-sixrd",
-    /* Next 3 arcs probably never used */
-    [IP_LOOKUP_NEXT_HOP_BY_HOP] = "ip6-hop-by-hop",
-    [IP_LOOKUP_NEXT_ADD_HOP_BY_HOP] = "ip6-add-hop-by-hop", 
-    [IP_LOOKUP_NEXT_POP_HOP_BY_HOP] = "ip6-pop-hop-by-hop", 
-  },
-};
-
-/* The main h-b-h tracer will be invoked, no need to do much here */
 typedef struct {
   u32 next_index;
 } ip6_add_hop_by_hop_trace_t;
@@ -556,7 +268,7 @@ ip6_add_hop_by_hop_node_fn (vlib_main_t * vm,
                  vlib_node_runtime_t * node,
                  vlib_frame_t * frame)
 {
-  ip6_hop_by_hop_main_t * hm = &ip6_hop_by_hop_main;
+  ip6_hop_by_hop_ioam_main_t * hm = &ip6_hop_by_hop_ioam_main;
   u32 n_left_from, * from, * to_next;
   ip_lookup_next_t next_index;
   u32 processed = 0;
@@ -683,7 +395,7 @@ ip6_add_hop_by_hop_node_fn (vlib_main_t * vm,
 
           hbh0 = (ip6_hop_by_hop_header_t *)(ip0 + 1);
           /* $$$ tune, rewrite_length is a multiple of 8 */
-          memcpy (hbh0, rewrite, rewrite_length);
+          clib_memcpy (hbh0, rewrite, rewrite_length);
           /* Patch the protocol chain, insert the h-b-h (type 0) header */
           hbh0->protocol = ip0->protocol;
           ip0->protocol = 0;
@@ -691,7 +403,7 @@ ip6_add_hop_by_hop_node_fn (vlib_main_t * vm,
           ip0->payload_length = clib_host_to_net_u16 (new_l0);
           
           /* Populate the (first) h-b-h list elt */
-          next0 = IP_LOOKUP_NEXT_HOP_BY_HOP;
+          next0 = IP6_HBYH_IOAM_INPUT_NEXT_IP6_LOOKUP;
 
           if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE) 
                             && (b0->flags & VLIB_BUFFER_IS_TRACED))) 
@@ -728,25 +440,15 @@ VLIB_REGISTER_NODE (ip6_add_hop_by_hop_node) = {
   .error_strings = ip6_add_hop_by_hop_error_strings,
 
   /* See ip/lookup.h */
-  .n_next_nodes = IP_LOOKUP_N_NEXT,
+  .n_next_nodes = IP6_HBYH_IOAM_INPUT_N_NEXT,
   .next_nodes = {
-    [IP_LOOKUP_NEXT_MISS] = "ip6-miss",
-    [IP_LOOKUP_NEXT_DROP] = "ip6-drop",
-    [IP_LOOKUP_NEXT_PUNT] = "ip6-punt",
-    [IP_LOOKUP_NEXT_LOCAL] = "ip6-local",
-    [IP_LOOKUP_NEXT_ARP] = "ip6-discover-neighbor",
-    [IP_LOOKUP_NEXT_REWRITE] = "ip6-rewrite",
-    [IP_LOOKUP_NEXT_CLASSIFY] = "ip6-classify",
-    [IP_LOOKUP_NEXT_MAP] = "ip6-map",
-    [IP_LOOKUP_NEXT_MAP_T] = "ip6-map-t",
-    [IP_LOOKUP_NEXT_SIXRD] = "ip6-sixrd",
-    /* Next 3 arcs probably never used */
-    [IP_LOOKUP_NEXT_HOP_BY_HOP] = "ip6-hop-by-hop",
-    [IP_LOOKUP_NEXT_ADD_HOP_BY_HOP] = "ip6-add-hop-by-hop", 
-    [IP_LOOKUP_NEXT_POP_HOP_BY_HOP] = "ip6-pop-hop-by-hop", 
+#define _(s,n) [IP6_HBYH_IOAM_INPUT_NEXT_##s] = n,
+    foreach_ip6_hbyh_ioam_input_next
+#undef _
   },
 };
 
+VLIB_NODE_FUNCTION_MULTIARCH (ip6_add_hop_by_hop_node, ip6_add_hop_by_hop_node_fn)
 
 /* The main h-b-h tracer was already invoked, no need to do much here */
 typedef struct {
@@ -765,11 +467,44 @@ static u8 * format_ip6_pop_hop_by_hop_trace (u8 * s, va_list * args)
   return s;
 }
 
+int
+ip6_hbh_pop_register_option (u8 option,
+                            int options(ip6_header_t *ip, ip6_hop_by_hop_option_t *opt))
+{
+  ip6_hop_by_hop_ioam_main_t * hm = &ip6_hop_by_hop_ioam_main;
+
+  ASSERT (option < ARRAY_LEN (hm->pop_options));
+
+  /* Already registered */
+  if (hm->pop_options[option])
+    return (-1);
+
+  hm->pop_options[option] = options;
+
+  return (0);
+}
+
+int
+ip6_hbh_pop_unregister_option (u8 option)
+{
+  ip6_hop_by_hop_ioam_main_t * hm = &ip6_hop_by_hop_ioam_main;
+
+  ASSERT (option < ARRAY_LEN (hm->pop_options));
+
+  /* Not registered */
+  if (!hm->pop_options[option])
+    return (-1);
+
+  hm->pop_options[option] = NULL;
+  return (0);
+}
+
 vlib_node_registration_t ip6_pop_hop_by_hop_node;
 
 #define foreach_ip6_pop_hop_by_hop_error                \
 _(PROCESSED, "Pkts w/ removed ip6 hop-by-hop options")  \
-_(NO_HOHO, "Pkts w/ no ip6 hop-by-hop options")
+_(NO_HOHO, "Pkts w/ no ip6 hop-by-hop options")         \
+_(OPTION_FAILED, "ip6 pop hop-by-hop failed to process")
 
 typedef enum {
 #define _(sym,str) IP6_POP_HOP_BY_HOP_ERROR_##sym,
@@ -784,23 +519,56 @@ static char * ip6_pop_hop_by_hop_error_strings[] = {
 #undef _
 };
 
+static inline void ioam_pop_hop_by_hop_processing (vlib_main_t * vm,
+                                                ip6_header_t *ip0,
+                                                ip6_hop_by_hop_header_t *hbh0)
+{
+  ip6_hop_by_hop_ioam_main_t * hm = &ip6_hop_by_hop_ioam_main;
+  ip6_hop_by_hop_option_t *opt0, *limit0;
+  u8 type0;
+
+  if (!hbh0 || !ip0) return;
+
+  opt0 = (ip6_hop_by_hop_option_t *)(hbh0+1);
+  limit0 = (ip6_hop_by_hop_option_t *)
+    ((u8 *)hbh0 + ((hbh0->length+1)<<3));
+
+  /* Scan the set of h-b-h options, process ones that we understand */
+  while (opt0 < limit0)
+    {
+      type0 = opt0->type;
+      switch (type0)
+       {
+       case 0: /* Pad1 */
+         opt0 = (ip6_hop_by_hop_option_t *) ((u8 *)opt0) + 1;
+         continue;
+       case 1: /* PadN */
+         break;
+       default:
+         if (hm->pop_options[type0])
+           {
+             if ((*hm->pop_options[type0])(ip0, opt0) < 0)
+             {
+               vlib_node_increment_counter (vm, ip6_pop_hop_by_hop_node.index, 
+                               IP6_POP_HOP_BY_HOP_ERROR_OPTION_FAILED, 1);
+             }
+           }
+       }
+       opt0 = (ip6_hop_by_hop_option_t *) (((u8 *)opt0) + opt0->length + sizeof (ip6_hop_by_hop_option_t));
+    }
+}
+
 static uword
 ip6_pop_hop_by_hop_node_fn (vlib_main_t * vm,
                  vlib_node_runtime_t * node,
                  vlib_frame_t * frame)
 {
-  ip6_hop_by_hop_main_t * hm = &ip6_hop_by_hop_main;
   ip6_main_t * im = &ip6_main;
   ip_lookup_main_t * lm = &im->lookup_main;
   u32 n_left_from, * from, * to_next;
   ip_lookup_next_t next_index;
   u32 processed = 0;
   u32 no_header = 0;
-  u32 (*ioam_end_of_path_cb) (vlib_main_t *, vlib_node_runtime_t *,
-                              vlib_buffer_t *, ip6_header_t *, 
-                              ip_adjacency_t *);
-  
-  ioam_end_of_path_cb = hm->ioam_end_of_path_cb;
   
   from = vlib_frame_vector_args (frame);
   n_left_from = frame->n_vectors;
@@ -912,37 +680,28 @@ ip6_pop_hop_by_hop_node_fn (vlib_main_t * vm,
           adj_index0 = vnet_buffer (b0)->ip.adj_index[VLIB_TX];
           adj0 = ip_get_adjacency (lm, adj_index0);
 
+         /* Default use the next_index from the adjacency. */
+         next0 = adj0->lookup_next_index;
+
           /* Perfectly normal to end up here w/ out h-b-h header */
-          if (PREDICT_TRUE (ip0->protocol == 0))
-            {
-              hbh0 = (ip6_hop_by_hop_header_t *)(ip0+1);
+         hbh0 = (ip6_hop_by_hop_header_t *)(ip0+1);
           
-              /* Collect data from trace via callback */
-              next0 = ioam_end_of_path_cb ? 
-                ioam_end_of_path_cb (vm, node, b0, ip0, adj0) 
-                : adj0->saved_lookup_next_index;
-              
-              
-              /* Pop the trace data */
-              vlib_buffer_advance (b0, (hbh0->length+1)<<3);
-              new_l0 = clib_net_to_host_u16 (ip0->payload_length) -
-                ((hbh0->length+1)<<3);
-              ip0->payload_length = clib_host_to_net_u16 (new_l0);
-              ip0->protocol = hbh0->protocol;
-              copy_src0 = (u64 *)ip0;
-              copy_dst0 = copy_src0 + (hbh0->length+1);
-              copy_dst0 [4] = copy_src0[4];
-              copy_dst0 [3] = copy_src0[3];
-              copy_dst0 [2] = copy_src0[2];
-              copy_dst0 [1] = copy_src0[1];
-              copy_dst0 [0] = copy_src0[0];
-              processed++;
-            }
-          else
-            {
-              next0 = adj0->saved_lookup_next_index;
-              no_header++;
-            }
+         /* TODO:Temporarily doing it here.. do this validation in end_of_path_cb */
+         ioam_pop_hop_by_hop_processing(vm, ip0, hbh0);
+         /* Pop the trace data */
+         vlib_buffer_advance (b0, (hbh0->length+1)<<3);
+         new_l0 = clib_net_to_host_u16 (ip0->payload_length) -
+           ((hbh0->length+1)<<3);
+         ip0->payload_length = clib_host_to_net_u16 (new_l0);
+         ip0->protocol = hbh0->protocol;
+         copy_src0 = (u64 *)ip0;
+         copy_dst0 = copy_src0 + (hbh0->length+1);
+         copy_dst0 [4] = copy_src0[4];
+         copy_dst0 [3] = copy_src0[3];
+         copy_dst0 [2] = copy_src0[2];
+         copy_dst0 [1] = copy_src0[1];
+         copy_dst0 [0] = copy_src0[0];
+         processed++;
               
           if (PREDICT_FALSE((node->flags & VLIB_NODE_FLAG_TRACE) 
                             && (b0->flags & VLIB_BUFFER_IS_TRACED))) 
@@ -974,35 +733,21 @@ VLIB_REGISTER_NODE (ip6_pop_hop_by_hop_node) = {
   .vector_size = sizeof (u32),
   .format_trace = format_ip6_pop_hop_by_hop_trace,
   .type = VLIB_NODE_TYPE_INTERNAL,
-  
+  .sibling_of = "ip6-lookup",
   .n_errors = ARRAY_LEN(ip6_pop_hop_by_hop_error_strings),
   .error_strings = ip6_pop_hop_by_hop_error_strings,
 
   /* See ip/lookup.h */
-  .n_next_nodes = IP_LOOKUP_N_NEXT,
-  .next_nodes = {
-    [IP_LOOKUP_NEXT_MISS] = "ip6-miss",
-    [IP_LOOKUP_NEXT_DROP] = "ip6-drop",
-    [IP_LOOKUP_NEXT_PUNT] = "ip6-punt",
-    [IP_LOOKUP_NEXT_LOCAL] = "ip6-local",
-    [IP_LOOKUP_NEXT_ARP] = "ip6-discover-neighbor",
-    [IP_LOOKUP_NEXT_REWRITE] = "ip6-rewrite",
-    [IP_LOOKUP_NEXT_CLASSIFY] = "ip6-classify",
-    [IP_LOOKUP_NEXT_MAP] = "ip6-map",
-    [IP_LOOKUP_NEXT_MAP_T] = "ip6-map-t",
-    [IP_LOOKUP_NEXT_SIXRD] = "ip6-sixrd",
-    /* Next 3 arcs probably never used */
-    [IP_LOOKUP_NEXT_HOP_BY_HOP] = "ip6-hop-by-hop",
-    [IP_LOOKUP_NEXT_ADD_HOP_BY_HOP] = "ip6-add-hop-by-hop", 
-    [IP_LOOKUP_NEXT_POP_HOP_BY_HOP] = "ip6-pop-hop-by-hop", 
-  },
+  .n_next_nodes = 0,
 };
 
+VLIB_NODE_FUNCTION_MULTIARCH (ip6_pop_hop_by_hop_node,
+                             ip6_pop_hop_by_hop_node_fn)
 
 static clib_error_t *
-ip6_hop_by_hop_init (vlib_main_t * vm)
+ip6_hop_by_hop_ioam_init (vlib_main_t * vm)
 {
-  ip6_hop_by_hop_main_t * hm = &ip6_hop_by_hop_main;
+  ip6_hop_by_hop_ioam_main_t * hm = &ip6_hop_by_hop_ioam_main;
 
   hm->vlib_main = vm;
   hm->vnet_main = vnet_get_main();
@@ -1010,26 +755,37 @@ ip6_hop_by_hop_init (vlib_main_t * vm)
   hm->vlib_time_0 = vlib_time_now (vm);
   hm->ioam_flag = IOAM_HBYH_MOD;
   hm->trace_tsp = TSP_MICROSECONDS; /* Micro seconds */
-
-  return 0;
+  memset(hm->add_options, 0, sizeof(hm->add_options));
+  memset(hm->pop_options, 0, sizeof(hm->pop_options));
+  memset(hm->options_size, 0, sizeof(hm->options_size));
+
+  /*
+   * Register the handlers
+   * XXX: This should be done dynamically based on OAM feature being enabled or not.
+   */
+  if (ip6_hbh_register_option(HBH_OPTION_TYPE_IOAM_TRACE_DATA_LIST, ip6_hbh_ioam_trace_data_list_handler,
+                             ip6_hbh_ioam_trace_data_list_trace_handler) < 0)
+    return (clib_error_create("registration of HBH_OPTION_TYPE_IOAM_TRACE_DATA_LIST failed"));
+
+  return (0);
 }
 
-VLIB_INIT_FUNCTION (ip6_hop_by_hop_init);
+VLIB_INIT_FUNCTION (ip6_hop_by_hop_ioam_init);
 
 int ip6_ioam_set_rewrite (u8 **rwp, u32 trace_type, u32 trace_option_elts, 
-                          int has_pow_option, int has_ppc_option)
+                          int has_pot_option, int has_ppc_option)
 {
+  ip6_hop_by_hop_ioam_main_t * hm = &ip6_hop_by_hop_ioam_main;  
   u8 *rewrite = 0;
   u32 size, rnd_size;
   ip6_hop_by_hop_header_t *hbh;
   ioam_trace_option_t * trace_option;
-  ioam_pow_option_t * pow_option;
   u8 *current;
   u8 trace_data_size = 0;  
 
   vec_free (*rwp);
 
-  if (trace_option_elts == 0 && has_pow_option == 0)
+  if (trace_option_elts == 0 && has_pot_option == 0)
     return -1;
 
   /* Work out how much space we need */
@@ -1048,10 +804,10 @@ int ip6_ioam_set_rewrite (u8 **rwp, u32 trace_type, u32 trace_option_elts,
   
       size += trace_option_elts * trace_data_size;
     }
-  if (has_pow_option)
+  if (has_pot_option && hm->add_options[HBH_OPTION_TYPE_IOAM_PROOF_OF_TRANSIT] != 0)
     {
       size += sizeof (ip6_hop_by_hop_option_t);
-      size += sizeof (ioam_pow_option_t);
+      size += hm->options_size[HBH_OPTION_TYPE_IOAM_PROOF_OF_TRANSIT];
     }
 
   /* Round to a multiple of 8 octets */
@@ -1078,14 +834,11 @@ int ip6_ioam_set_rewrite (u8 **rwp, u32 trace_type, u32 trace_option_elts,
       current += sizeof (ioam_trace_option_t) + 
         trace_option_elts * trace_data_size;
     }
-  if (has_pow_option)
+  if (has_pot_option && hm->add_options[HBH_OPTION_TYPE_IOAM_PROOF_OF_TRANSIT] != 0)
     {
-      pow_option = (ioam_pow_option_t *)current;
-      pow_option->hdr.type = HBH_OPTION_TYPE_IOAM_PROOF_OF_WORK
-        | HBH_OPTION_TYPE_DATA_CHANGE_ENROUTE;
-      pow_option->hdr.length = sizeof (ioam_pow_option_t) - 
-        sizeof (ip6_hop_by_hop_option_t);
-      current += sizeof (ioam_pow_option_t);
+      if (0 == hm->add_options[HBH_OPTION_TYPE_IOAM_PROOF_OF_TRANSIT](current,
+                                       hm->options_size[HBH_OPTION_TYPE_IOAM_PROOF_OF_TRANSIT]))
+         current += sizeof (hm->options_size[HBH_OPTION_TYPE_IOAM_PROOF_OF_TRANSIT]);
     }
   
   *rwp = rewrite;
@@ -1095,7 +848,7 @@ int ip6_ioam_set_rewrite (u8 **rwp, u32 trace_type, u32 trace_option_elts,
 clib_error_t *
 clear_ioam_rewrite_fn(void)
 {
-  ip6_hop_by_hop_main_t *hm = &ip6_hop_by_hop_main;
+  ip6_hop_by_hop_ioam_main_t *hm = &ip6_hop_by_hop_ioam_main;
 
   vec_free(hm->rewrite);
   hm->rewrite = 0;
@@ -1103,7 +856,7 @@ clear_ioam_rewrite_fn(void)
   hm->app_data = 0;
   hm->trace_type = 0;
   hm->trace_option_elts = 0;
-  hm->has_pow_option = 0;
+  hm->has_pot_option = 0;
   hm->has_ppc_option = 0;
   hm->trace_tsp = TSP_MICROSECONDS; 
 
@@ -1125,13 +878,13 @@ VLIB_CLI_COMMAND (ip6_clear_ioam_trace_cmd, static) = {
 
 clib_error_t *
 ip6_ioam_trace_profile_set(u32 trace_option_elts, u32 trace_type, u32 node_id,
-                           u32 app_data, int has_pow_option, u32 trace_tsp, 
+                           u32 app_data, int has_pot_option, u32 trace_tsp, 
                            int has_ppc_option)
 {
   int rv;
-  ip6_hop_by_hop_main_t *hm = &ip6_hop_by_hop_main;
+  ip6_hop_by_hop_ioam_main_t *hm = &ip6_hop_by_hop_ioam_main;
   rv = ip6_ioam_set_rewrite (&hm->rewrite, trace_type, trace_option_elts,
-                             has_pow_option, has_ppc_option);
+                             has_pot_option, has_ppc_option);
 
   switch (rv)
     {
@@ -1140,7 +893,7 @@ ip6_ioam_trace_profile_set(u32 trace_option_elts, u32 trace_type, u32 node_id,
       hm->app_data = app_data;
       hm->trace_type = trace_type;
       hm->trace_option_elts = trace_option_elts;
-      hm->has_pow_option = has_pow_option;
+      hm->has_pot_option = has_pot_option;
       hm->has_ppc_option = has_ppc_option;
       hm->trace_tsp = trace_tsp;
       break;
@@ -1161,7 +914,7 @@ ip6_set_ioam_rewrite_command_fn (vlib_main_t * vm,
   u32 trace_option_elts = 0;
   u32 trace_type = 0, node_id = 0; 
   u32 app_data = 0, trace_tsp = TSP_MICROSECONDS;
-  int has_pow_option = 0;
+  int has_pot_option = 0;
   int has_ppc_option = 0;
   clib_error_t * rv = 0;
   
@@ -1172,8 +925,8 @@ ip6_set_ioam_rewrite_command_fn (vlib_main_t * vm,
                       &trace_type, &trace_option_elts, &trace_tsp,
                       &node_id, &app_data))
             ;
-      else if (unformat (input, "pow"))
-        has_pow_option = 1;
+      else if (unformat (input, "pot"))
+        has_pot_option = 1;
       else if (unformat (input, "ppc encap"))
         has_ppc_option = PPC_ENCAP;
       else if (unformat (input, "ppc decap"))
@@ -1186,7 +939,7 @@ ip6_set_ioam_rewrite_command_fn (vlib_main_t * vm,
   
     
     rv = ip6_ioam_trace_profile_set(trace_option_elts, trace_type, node_id,
-                           app_data, has_pow_option, trace_tsp, has_ppc_option);
+                           app_data, has_pot_option, trace_tsp, has_ppc_option);
 
     return rv;
 }
@@ -1194,7 +947,7 @@ ip6_set_ioam_rewrite_command_fn (vlib_main_t * vm,
 
 VLIB_CLI_COMMAND (ip6_set_ioam_rewrite_cmd, static) = {
   .path = "set ioam rewrite",
-  .short_help = "set ioam rewrite trace-type <0x1f|0x3|0x9|0x11|0x19> trace-elts <nn> trace-tsp <0|1|2|3> node-id <node id in hex> app-data <app_data in hex> [pow] [ppc <encap|decap>]",
+  .short_help = "set ioam rewrite trace-type <0x1f|0x3|0x9|0x11|0x19> trace-elts <nn> trace-tsp <0|1|2|3> node-id <node id in hex> app-data <app_data in hex> [pot] [ppc <encap|decap>]",
   .function = ip6_set_ioam_rewrite_command_fn,
 };
   
@@ -1203,7 +956,7 @@ ip6_show_ioam_summary_cmd_fn (vlib_main_t * vm,
                       unformat_input_t * input,
                       vlib_cli_command_t * cmd)
 {
-  ip6_hop_by_hop_main_t *hm = &ip6_hop_by_hop_main;
+  ip6_hop_by_hop_ioam_main_t *hm = &ip6_hop_by_hop_ioam_main;
   u8 *s = 0;
 
 
@@ -1242,10 +995,10 @@ ip6_show_ioam_summary_cmd_fn (vlib_main_t * vm,
   s = format(s, " HOP BY HOP OPTIONS - TRACE CONFIG - Not configured\n");
   }
 
-  s = format(s, "                        POW OPTION - %d (%s)\n", 
-          hm->has_pow_option, (hm->has_pow_option?"Enabled":"Disabled"));
-  if (hm->has_pow_option)
-    s = format(s, "Try 'show ioam sc-profile' for more information\n");
+  s = format(s, "                        POT OPTION - %d (%s)\n", 
+          hm->has_pot_option, (hm->has_pot_option?"Enabled":"Disabled"));
+  if (hm->has_pot_option)
+    s = format(s, "Try 'show ioam pot and show pot profile' for more information\n");
 
   s = format(s, "         EDGE TO EDGE - PPC OPTION - %d (%s)\n", 
          hm->has_ppc_option, ppc_state[hm->has_ppc_option]);
@@ -1267,7 +1020,7 @@ int ip6_ioam_set_destination (ip6_address_t *addr, u32 mask_width, u32 vrf_id,
                               int is_add, int is_pop, int is_none)
 {
   ip6_main_t * im = &ip6_main;
-  ip6_hop_by_hop_main_t * hm = &ip6_hop_by_hop_main;
+  ip6_hop_by_hop_ioam_main_t * hm = &ip6_hop_by_hop_ioam_main;
   ip_lookup_main_t * lm = &im->lookup_main;
   ip_adjacency_t * adj;
   u32 fib_index;
@@ -1326,10 +1079,10 @@ int ip6_ioam_set_destination (ip6_address_t *addr, u32 mask_width, u32 vrf_id,
     adj->saved_lookup_next_index = adj->lookup_next_index;
 
   if (is_add)
-    adj->lookup_next_index = IP_LOOKUP_NEXT_ADD_HOP_BY_HOP;
+    adj->lookup_next_index = IP6_LOOKUP_NEXT_ADD_HOP_BY_HOP;
 
   if (is_pop)
-    adj->lookup_next_index = IP_LOOKUP_NEXT_POP_HOP_BY_HOP;
+    adj->lookup_next_index = IP6_LOOKUP_NEXT_POP_HOP_BY_HOP;
 
   hm->adj = *addr;
   hm->ioam_flag = (is_add ? IOAM_HBYH_ADD :
@@ -1392,11 +1145,10 @@ VLIB_CLI_COMMAND (ip6_set_ioam_destination_cmd, static) = {
   .function = ip6_set_ioam_destination_command_fn,
 };
 
+
 void vnet_register_ioam_end_of_path_callback (void *cb)
 {
-  ip6_hop_by_hop_main_t * hm = &ip6_hop_by_hop_main;
+  ip6_hop_by_hop_ioam_main_t * hm = &ip6_hop_by_hop_ioam_main;
 
   hm->ioam_end_of_path_cb = cb;
 }
-                                             
-