gre: Tunnel encap/decap flags
[vpp.git] / src / vnet / gre / interface.c
index d574e59..553c89a 100644 (file)
 #include <vnet/adj/adj_midchain.h>
 #include <vnet/adj/adj_nbr.h>
 #include <vnet/mpls/mpls.h>
+#include <vnet/l2/l2_input.h>
+#include <vnet/nhrp/nhrp.h>
 
-static const char *gre_tunnel_type_names[] = GRE_TUNNEL_TYPE_NAMES;
-
-static inline u64
-gre4_mk_key (const ip4_address_t *src,
-            const ip4_address_t *dst,
-            u32 out_fib_index)
-{
-  // FIXME. the fib index should be part of the key
-  return ((u64)src->as_u32 << 32 | (u64)dst->as_u32);
-}
-
-static u8 *
+u8 *
 format_gre_tunnel_type (u8 * s, va_list * args)
 {
-  gre_tunnel_type_t type = va_arg (*args, gre_tunnel_type_t);
+  gre_tunnel_type_t type = va_arg (*args, int);
+
+  switch (type)
+    {
+#define _(n, v) case GRE_TUNNEL_TYPE_##n:       \
+      s = format (s, "%s", v);                  \
+      break;
+      foreach_gre_tunnel_type
+#undef _
+    }
 
-  return (format(s, "%s", gre_tunnel_type_names[type]));
+  return (s);
 }
 
 static u8 *
 format_gre_tunnel (u8 * s, va_list * args)
 {
-  gre_tunnel_t * t = va_arg (*args, gre_tunnel_t *);
-  gre_main_t * gm = &gre_main;
-  u8 is_ipv6 = t->tunnel_dst.fp_proto == FIB_PROTOCOL_IP6 ? 1 : 0;
+  gre_tunnel_t *t = va_arg (*args, gre_tunnel_t *);
 
-  if (!is_ipv6)
-      s = format (s,
-                  "[%d] %U (src) %U (dst) payload %U outer_fib_index %d",
-                  t - gm->tunnels,
-                  format_ip4_address, &t->tunnel_src.ip4,
-                  format_ip4_address, &t->tunnel_dst.fp_addr.ip4,
-                  format_gre_tunnel_type, t->type,
-                  t->outer_fib_index);
-  else
-      s = format (s,
-                  "[%d] %U (src) %U (dst) payload %U outer_fib_index %d",
-                  t - gm->tunnels,
-                  format_ip6_address, &t->tunnel_src.ip6,
-                  format_ip6_address, &t->tunnel_dst.fp_addr.ip6,
-                  format_gre_tunnel_type, t->type,
-                  t->outer_fib_index);
+  s = format (s, "[%d] instance %d src %U dst %U fib-idx %d sw-if-idx %d ",
+             t->dev_instance, t->user_instance,
+             format_ip46_address, &t->tunnel_src, IP46_TYPE_ANY,
+             format_ip46_address, &t->tunnel_dst.fp_addr, IP46_TYPE_ANY,
+             t->outer_fib_index, t->sw_if_index);
+
+  s = format (s, "payload %U ", format_gre_tunnel_type, t->type);
+  s = format (s, "%U ", format_tunnel_mode, t->mode);
+
+  if (t->type == GRE_TUNNEL_TYPE_ERSPAN)
+    s = format (s, "session %d ", t->session_id);
+
+  if (t->type != GRE_TUNNEL_TYPE_L3)
+    s = format (s, "l2-adj-idx %d ", t->l2_adj_index);
 
   return s;
 }
 
 static gre_tunnel_t *
-gre_tunnel_db_find (const ip46_address_t *src,
-                    const ip46_address_t *dst,
-                    u32 out_fib_index,
-                    u8 is_ipv6)
+gre_tunnel_db_find (const vnet_gre_tunnel_add_del_args_t * a,
+                   u32 outer_fib_index, gre_tunnel_key_t * key)
 {
-  gre_main_t * gm = &gre_main;
-  uword * p;
-  u64 key4, key6[4];
+  gre_main_t *gm = &gre_main;
+  uword *p;
 
-  if (!is_ipv6)
+  if (!a->is_ipv6)
     {
-      key4 = gre4_mk_key(&src->ip4, &dst->ip4, out_fib_index);
-      p = hash_get (gm->tunnel_by_key4, key4);
+      gre_mk_key4 (a->src.ip4, a->dst.ip4, outer_fib_index,
+                  a->type, a->mode, a->session_id, &key->gtk_v4);
+      p = hash_get_mem (gm->tunnel_by_key4, &key->gtk_v4);
     }
   else
     {
-      key6[0] = src->ip6.as_u64[0];
-      key6[1] = src->ip6.as_u64[1];
-      key6[2] = dst->ip6.as_u64[0];
-      key6[3] = dst->ip6.as_u64[1];
-      p = hash_get_mem (gm->tunnel_by_key6, key6);
+      gre_mk_key6 (&a->src.ip6, &a->dst.ip6, outer_fib_index,
+                  a->type, a->mode, a->session_id, &key->gtk_v6);
+      p = hash_get_mem (gm->tunnel_by_key6, &key->gtk_v6);
     }
 
   if (NULL == p)
@@ -102,62 +94,33 @@ gre_tunnel_db_find (const ip46_address_t *src,
 }
 
 static void
-gre_tunnel_db_add (const gre_tunnel_t *t)
+gre_tunnel_db_add (gre_tunnel_t * t, gre_tunnel_key_t * key)
 {
-  gre_main_t * gm = &gre_main;
-  u64 key4, key6[4], *key6_copy;
-  u8 is_ipv6 = t->tunnel_dst.fp_proto == FIB_PROTOCOL_IP6 ? 1 : 0;
+  gre_main_t *gm = &gre_main;
 
-  if (!is_ipv6)
+  if (t->tunnel_dst.fp_proto == FIB_PROTOCOL_IP6)
     {
-      key4 = gre4_mk_key(&t->tunnel_src.ip4, &t->tunnel_dst.fp_addr.ip4,
-                       t->outer_fib_index);
-      hash_set (gm->tunnel_by_key4, key4, t - gm->tunnels);
+      hash_set_mem_alloc (&gm->tunnel_by_key6, &key->gtk_v6, t->dev_instance);
     }
   else
     {
-      key6[0] = t->tunnel_src.ip6.as_u64[0];
-      key6[1] = t->tunnel_src.ip6.as_u64[1];
-      key6[2] = t->tunnel_dst.fp_addr.ip6.as_u64[0];
-      key6[3] = t->tunnel_dst.fp_addr.ip6.as_u64[1];
-      key6_copy = clib_mem_alloc (sizeof (key6));
-      clib_memcpy (key6_copy, key6, sizeof (key6));
-      hash_set_mem (gm->tunnel_by_key6, key6_copy, t - gm->tunnels);
+      hash_set_mem_alloc (&gm->tunnel_by_key4, &key->gtk_v4, t->dev_instance);
     }
 }
 
 static void
-gre_tunnel_db_remove (const gre_tunnel_t *t)
+gre_tunnel_db_remove (gre_tunnel_t * t, gre_tunnel_key_t * key)
 {
-  gre_main_t * gm = &gre_main;
-  u64 key4, key6[4];
-  u8 is_ipv6 = t->tunnel_dst.fp_proto == FIB_PROTOCOL_IP6 ? 1 : 0;
+  gre_main_t *gm = &gre_main;
 
-  if (!is_ipv6)
+  if (t->tunnel_dst.fp_proto == FIB_PROTOCOL_IP6)
     {
-      key4 = gre4_mk_key(&t->tunnel_src.ip4, &t->tunnel_dst.fp_addr.ip4,
-                         t->outer_fib_index);
-      hash_unset (gm->tunnel_by_key4, key4);
+      hash_unset_mem_free (&gm->tunnel_by_key6, &key->gtk_v6);
     }
   else
     {
-      key6[0] = t->tunnel_src.ip6.as_u64[0];
-      key6[1] = t->tunnel_src.ip6.as_u64[1];
-      key6[2] = t->tunnel_dst.fp_addr.ip6.as_u64[0];
-      key6[3] = t->tunnel_dst.fp_addr.ip6.as_u64[1];
-      hash_unset_mem (gm->tunnel_by_key6, key6);
+      hash_unset_mem_free (&gm->tunnel_by_key4, &key->gtk_v4);
     }
-
-}
-
-static gre_tunnel_t *
-gre_tunnel_from_fib_node (fib_node_t *node)
-{
-#if (CLIB_DEBUG > 0)
-    ASSERT(FIB_NODE_TYPE_GRE_TUNNEL == node->fn_type);
-#endif
-    return ((gre_tunnel_t*) (((char*)node) -
-                             STRUCT_OFFSET_OF(gre_tunnel_t, node)));
 }
 
 /**
@@ -168,37 +131,29 @@ gre_tunnel_from_fib_node (fib_node_t *node)
 void
 gre_tunnel_stack (adj_index_t ai)
 {
-    gre_main_t * gm = &gre_main;
-    ip_adjacency_t *adj;
-    gre_tunnel_t *gt;
-    u32 sw_if_index;
-
-    adj = adj_get(ai);
-    sw_if_index = adj->rewrite_header.sw_if_index;
-
-    if ((vec_len(gm->tunnel_index_by_sw_if_index) < sw_if_index) ||
-       (~0 == gm->tunnel_index_by_sw_if_index[sw_if_index]))
-       return;
-
-    gt = pool_elt_at_index(gm->tunnels,
-                          gm->tunnel_index_by_sw_if_index[sw_if_index]);
-
-    /*
-     * find the adjacency that is contributed by the FIB entry
-     * that this tunnel resovles via, and use it as the next adj
-     * in the midchain
-     */
-    if (vnet_hw_interface_get_flags(vnet_get_main(),
-                                   gt->hw_if_index) &
-       VNET_HW_INTERFACE_FLAG_LINK_UP)
+  gre_main_t *gm = &gre_main;
+  ip_adjacency_t *adj;
+  gre_tunnel_t *gt;
+  u32 sw_if_index;
+
+  adj = adj_get (ai);
+  sw_if_index = adj->rewrite_header.sw_if_index;
+
+  if ((vec_len (gm->tunnel_index_by_sw_if_index) <= sw_if_index) ||
+      (~0 == gm->tunnel_index_by_sw_if_index[sw_if_index]))
+    return;
+
+  gt = pool_elt_at_index (gm->tunnels,
+                         gm->tunnel_index_by_sw_if_index[sw_if_index]);
+
+  if ((vnet_hw_interface_get_flags (vnet_get_main (), gt->hw_if_index) &
+       VNET_HW_INTERFACE_FLAG_LINK_UP) == 0)
     {
-       adj_nbr_midchain_stack(
-           ai,
-           fib_entry_contribute_ip_forwarding(gt->fib_entry_index));
+      adj_midchain_delegate_unstack (ai);
     }
-    else
+  else
     {
-       adj_nbr_midchain_unstack(ai);
+      adj_midchain_delegate_stack (ai, gt->outer_fib_index, &gt->tunnel_dst);
     }
 }
 
@@ -206,208 +161,250 @@ gre_tunnel_stack (adj_index_t ai)
  * @brief Call back when restacking all adjacencies on a GRE interface
  */
 static adj_walk_rc_t
-gre_adj_walk_cb (adj_index_t ai,
-                void *ctx)
+gre_adj_walk_cb (adj_index_t ai, void *ctx)
 {
-    gre_tunnel_stack(ai);
+  gre_tunnel_stack (ai);
 
-    return (ADJ_WALK_RC_CONTINUE);
+  return (ADJ_WALK_RC_CONTINUE);
 }
 
 static void
-gre_tunnel_restack (gre_tunnel_t *gt)
+gre_tunnel_restack (gre_tunnel_t * gt)
 {
-    fib_protocol_t proto;
+  fib_protocol_t proto;
 
-    /*
-     * walk all the adjacencies on th GRE interface and restack them
-     */
-    FOR_EACH_FIB_IP_PROTOCOL(proto)
-    {
-       adj_nbr_walk(gt->sw_if_index,
-                    proto,
-                    gre_adj_walk_cb,
-                    NULL);
-    }
+  /*
+   * walk all the adjacencies on th GRE interface and restack them
+   */
+  FOR_EACH_FIB_IP_PROTOCOL (proto)
+  {
+    adj_nbr_walk (gt->sw_if_index, proto, gre_adj_walk_cb, NULL);
+  }
 }
 
-/**
- * Function definition to backwalk a FIB node
- */
-static fib_node_back_walk_rc_t
-gre_tunnel_back_walk (fib_node_t *node,
-                     fib_node_back_walk_ctx_t *ctx)
+static void
+gre_nhrp_mk_key (const gre_tunnel_t * t,
+                const nhrp_entry_t * ne, gre_tunnel_key_t * key)
 {
-    gre_tunnel_restack(gre_tunnel_from_fib_node(node));
+  const fib_prefix_t *nh;
+
+  nh = nhrp_entry_get_nh (ne);
 
-    return (FIB_NODE_BACK_WALK_CONTINUE);
+  /* construct the key using mode P2P so it can be found in the DP */
+  if (FIB_PROTOCOL_IP4 == nh->fp_proto)
+    gre_mk_key4 (t->tunnel_src.ip4,
+                nh->fp_addr.ip4,
+                nhrp_entry_get_fib_index (ne),
+                t->type, TUNNEL_MODE_P2P, 0, &key->gtk_v4);
+  else
+    gre_mk_key6 (&t->tunnel_src.ip6,
+                &nh->fp_addr.ip6,
+                nhrp_entry_get_fib_index (ne),
+                t->type, TUNNEL_MODE_P2P, 0, &key->gtk_v6);
 }
 
 /**
- * Function definition to get a FIB node from its index
+ * An NHRP entry has been added
  */
-static fib_node_t*
-gre_tunnel_fib_node_get (fib_node_index_t index)
+static void
+gre_nhrp_entry_added (const nhrp_entry_t * ne)
+{
+  gre_main_t *gm = &gre_main;
+  const ip46_address_t *nh;
+  gre_tunnel_key_t key;
+  gre_tunnel_t *t;
+  u32 sw_if_index;
+  u32 t_idx;
+
+  sw_if_index = nhrp_entry_get_sw_if_index (ne);
+  if (vec_len (gm->tunnel_index_by_sw_if_index) < sw_if_index)
+    return;
+
+  t_idx = gm->tunnel_index_by_sw_if_index[sw_if_index];
+
+  if (INDEX_INVALID == t_idx)
+    return;
+
+  /* entry has been added on an interface for which there is a GRE tunnel */
+  t = pool_elt_at_index (gm->tunnels, t_idx);
+
+  if (t->mode != TUNNEL_MODE_MP)
+    return;
+
+  /* the next-hop (underlay) of the NHRP entry will form part of the key for
+   * ingress lookup to match packets to this interface */
+  gre_nhrp_mk_key (t, ne, &key);
+  gre_tunnel_db_add (t, &key);
+
+  /* update the rewrites for each of the adjacencies for this peer (overlay)
+   * using  the next-hop (underlay) */
+  mgre_walk_ctx_t ctx = {
+    .t = t,
+    .ne = ne
+  };
+  nh = nhrp_entry_get_peer (ne);
+  adj_nbr_walk_nh (nhrp_entry_get_sw_if_index (ne),
+                  (ip46_address_is_ip4 (nh) ?
+                   FIB_PROTOCOL_IP4 :
+                   FIB_PROTOCOL_IP6), nh, mgre_mk_complete_walk, &ctx);
+}
+
+static void
+gre_nhrp_entry_deleted (const nhrp_entry_t * ne)
 {
-    gre_tunnel_t * gt;
-    gre_main_t * gm;
+  gre_main_t *gm = &gre_main;
+  const ip46_address_t *nh;
+  gre_tunnel_key_t key;
+  gre_tunnel_t *t;
+  u32 sw_if_index;
+  u32 t_idx;
+
+  sw_if_index = nhrp_entry_get_sw_if_index (ne);
+  if (vec_len (gm->tunnel_index_by_sw_if_index) < sw_if_index)
+    return;
+
+  t_idx = gm->tunnel_index_by_sw_if_index[sw_if_index];
 
-    gm  = &gre_main;
-    gt = pool_elt_at_index(gm->tunnels, index);
+  if (INDEX_INVALID == t_idx)
+    return;
 
-    return (&gt->node);
+  t = pool_elt_at_index (gm->tunnels, t_idx);
+
+  /* remove the next-hop as an ingress lookup key */
+  gre_nhrp_mk_key (t, ne, &key);
+  gre_tunnel_db_remove (t, &key);
+
+  nh = nhrp_entry_get_peer (ne);
+
+  /* make all the adjacencies incomplete */
+  adj_nbr_walk_nh (nhrp_entry_get_sw_if_index (ne),
+                  (ip46_address_is_ip4 (nh) ?
+                   FIB_PROTOCOL_IP4 :
+                   FIB_PROTOCOL_IP6), nh, mgre_mk_incomplete_walk, t);
 }
 
-/**
- * Function definition to inform the FIB node that its last lock has gone.
- */
-static void
-gre_tunnel_last_lock_gone (fib_node_t *node)
+static walk_rc_t
+gre_tunnel_delete_nhrp_walk (index_t nei, void *ctx)
 {
-    /*
-     * The MPLS GRE tunnel is a root of the graph. As such
-     * it never has children and thus is never locked.
-     */
-    ASSERT(0);
+  gre_tunnel_t *t = ctx;
+  gre_tunnel_key_t key;
+
+  gre_nhrp_mk_key (t, nhrp_entry_get (nei), &key);
+  gre_tunnel_db_remove (t, &key);
+
+  return (WALK_CONTINUE);
 }
 
-/*
- * Virtual function table registered by MPLS GRE tunnels
- * for participation in the FIB object graph.
- */
-const static fib_node_vft_t gre_vft = {
-    .fnv_get = gre_tunnel_fib_node_get,
-    .fnv_last_lock = gre_tunnel_last_lock_gone,
-    .fnv_back_walk = gre_tunnel_back_walk,
-};
+static walk_rc_t
+gre_tunnel_add_nhrp_walk (index_t nei, void *ctx)
+{
+  gre_tunnel_t *t = ctx;
+  gre_tunnel_key_t key;
+
+  gre_nhrp_mk_key (t, nhrp_entry_get (nei), &key);
+  gre_tunnel_db_add (t, &key);
+
+  return (WALK_CONTINUE);
+}
 
 static int
-vnet_gre_tunnel_add (vnet_gre_add_del_tunnel_args_t *a,
-                     u32 * sw_if_indexp)
+vnet_gre_tunnel_add (vnet_gre_tunnel_add_del_args_t * a,
+                    u32 outer_fib_index, u32 * sw_if_indexp)
 {
-  gre_main_t * gm = &gre_main;
-  vnet_main_t * vnm = gm->vnet_main;
-  ip4_main_t * im4 = &ip4_main;
-  ip6_main_t * im6 = &ip6_main;
-  gre_tunnel_t * t;
-  vnet_hw_interface_t * hi;
+  gre_main_t *gm = &gre_main;
+  vnet_main_t *vnm = gm->vnet_main;
+  ip4_main_t *im4 = &ip4_main;
+  ip6_main_t *im6 = &ip6_main;
+  gre_tunnel_t *t;
+  vnet_hw_interface_t *hi;
   u32 hw_if_index, sw_if_index;
-  u32 outer_fib_index;
-  u8 address[6];
   clib_error_t *error;
   u8 is_ipv6 = a->is_ipv6;
+  gre_tunnel_key_t key;
 
-  if (!is_ipv6)
-    outer_fib_index = ip4_fib_index_from_table_id(a->outer_fib_id);
-  else
-    outer_fib_index = ip6_fib_index_from_table_id(a->outer_fib_id);
-
-  if (~0 == outer_fib_index)
-    return VNET_API_ERROR_NO_SUCH_FIB;
-
-  t = gre_tunnel_db_find(&a->src, &a->dst, a->outer_fib_id, a->is_ipv6);
-
+  t = gre_tunnel_db_find (a, outer_fib_index, &key);
   if (NULL != t)
-    return VNET_API_ERROR_INVALID_VALUE;
+    return VNET_API_ERROR_IF_ALREADY_EXISTS;
 
   pool_get_aligned (gm->tunnels, t, CLIB_CACHE_LINE_BYTES);
-  memset (t, 0, sizeof (*t));
-  fib_node_init(&t->node, FIB_NODE_TYPE_GRE_TUNNEL);
+  clib_memset (t, 0, sizeof (*t));
+
+  /* Reconcile the real dev_instance and a possible requested instance */
+  u32 t_idx = t - gm->tunnels; /* tunnel index (or instance) */
+  u32 u_idx = a->instance;     /* user specified instance */
+  if (u_idx == ~0)
+    u_idx = t_idx;
+  if (hash_get (gm->instance_used, u_idx))
+    {
+      pool_put (gm->tunnels, t);
+      return VNET_API_ERROR_INSTANCE_IN_USE;
+    }
+  hash_set (gm->instance_used, u_idx, 1);
+
+  t->dev_instance = t_idx;     /* actual */
+  t->user_instance = u_idx;    /* name */
 
-  if (a->teb)
-      t->type = GRE_TUNNEL_TYPE_TEB;
+  t->type = a->type;
+  t->mode = a->mode;
+  t->flags = a->flags;
+  if (t->type == GRE_TUNNEL_TYPE_ERSPAN)
+    t->session_id = a->session_id;
+
+  if (t->type == GRE_TUNNEL_TYPE_L3)
+    {
+      if (t->mode == TUNNEL_MODE_P2P)
+       hw_if_index =
+         vnet_register_interface (vnm, gre_device_class.index, t_idx,
+                                  gre_hw_interface_class.index, t_idx);
+      else
+       hw_if_index =
+         vnet_register_interface (vnm, gre_device_class.index, t_idx,
+                                  mgre_hw_interface_class.index, t_idx);
+    }
   else
-      t->type = GRE_TUNNEL_TYPE_L3;
-
-  if (vec_len (gm->free_gre_tunnel_hw_if_indices[t->type]) > 0) {
-      vnet_interface_main_t * im = &vnm->interface_main;
-
-      hw_if_index = gm->free_gre_tunnel_hw_if_indices[t->type]
-          [vec_len (gm->free_gre_tunnel_hw_if_indices[t->type])-1];
-      _vec_len (gm->free_gre_tunnel_hw_if_indices[t->type]) -= 1;
-
-      hi = vnet_get_hw_interface (vnm, hw_if_index);
-      hi->dev_instance = t - gm->tunnels;
-      hi->hw_instance = hi->dev_instance;
-
-      /* clear old stats of freed tunnel before reuse */
-      sw_if_index = hi->sw_if_index;
-      vnet_interface_counter_lock(im);
-      vlib_zero_combined_counter
-          (&im->combined_sw_if_counters[VNET_INTERFACE_COUNTER_TX], sw_if_index);
-      vlib_zero_combined_counter
-          (&im->combined_sw_if_counters[VNET_INTERFACE_COUNTER_RX], sw_if_index);
-      vlib_zero_simple_counter
-          (&im->sw_if_counters[VNET_INTERFACE_COUNTER_DROP], sw_if_index);
-        vnet_interface_counter_unlock(im);
-      if (GRE_TUNNEL_TYPE_TEB == t->type)
-      {
-          t->l2_tx_arc = vlib_node_add_named_next(vlib_get_main(),
-                                                  hi->tx_node_index,
-                                                  "adj-l2-midchain");
-      }
-    } else {
-      if (GRE_TUNNEL_TYPE_TEB == t->type)
-      {
-        /* Default MAC address (d00b:eed0:0000 + sw_if_index) */
-        memset (address, 0, sizeof (address));
-        address[0] = 0xd0;
-        address[1] = 0x0b;
-        address[2] = 0xee;
-        address[3] = 0xd0;
-        address[4] = t - gm->tunnels;
-
-        error = ethernet_register_interface(vnm,
-                                           gre_device_teb_class.index,
-                                           t - gm->tunnels, address,
-                                           &hw_if_index,
-                                           0);
-
-        if (error)
-        {
-          clib_error_report (error);
-          return VNET_API_ERROR_INVALID_REGISTRATION;
-        }
-       hi = vnet_get_hw_interface (vnm, hw_if_index);
-
-       t->l2_tx_arc = vlib_node_add_named_next(vlib_get_main(),
-                                               hi->tx_node_index,
-                                               "adj-l2-midchain");
-      } else {
-       hw_if_index = vnet_register_interface(vnm,
-                                             gre_device_class.index,
-                                             t - gm->tunnels,
-                                             gre_hw_interface_class.index,
-                                             t - gm->tunnels);
-      }
-      hi = vnet_get_hw_interface (vnm, hw_if_index);
-      sw_if_index = hi->sw_if_index;
+    {
+      /* Default MAC address (d00b:eed0:0000 + sw_if_index) */
+      u8 address[6] =
+       { 0xd0, 0x0b, 0xee, 0xd0, (u8) (t_idx >> 8), (u8) t_idx };
+      error =
+       ethernet_register_interface (vnm, gre_device_class.index, t_idx,
+                                    address, &hw_if_index, 0);
+      if (error)
+       {
+         clib_error_report (error);
+         return VNET_API_ERROR_INVALID_REGISTRATION;
+       }
     }
 
+  /* Set GRE tunnel interface output node (not used for L3 payload) */
+  vnet_set_interface_output_node (vnm, hw_if_index, gre_encap_node.index);
+
+  hi = vnet_get_hw_interface (vnm, hw_if_index);
+  sw_if_index = hi->sw_if_index;
+
   t->hw_if_index = hw_if_index;
   t->outer_fib_index = outer_fib_index;
   t->sw_if_index = sw_if_index;
   t->l2_adj_index = ADJ_INDEX_INVALID;
 
   vec_validate_init_empty (gm->tunnel_index_by_sw_if_index, sw_if_index, ~0);
-  gm->tunnel_index_by_sw_if_index[sw_if_index] = t - gm->tunnels;
+  gm->tunnel_index_by_sw_if_index[sw_if_index] = t_idx;
 
   if (!is_ipv6)
     {
       vec_validate (im4->fib_index_by_sw_if_index, sw_if_index);
-      hi->min_packet_bytes = 64 + sizeof (gre_header_t) + sizeof (ip4_header_t);
+      hi->min_packet_bytes =
+       64 + sizeof (gre_header_t) + sizeof (ip4_header_t);
     }
   else
     {
       vec_validate (im6->fib_index_by_sw_if_index, sw_if_index);
-      hi->min_packet_bytes = 64 + sizeof (gre_header_t) + sizeof (ip6_header_t);
+      hi->min_packet_bytes =
+       64 + sizeof (gre_header_t) + sizeof (ip6_header_t);
     }
 
-  hi->per_packet_overhead_bytes =
-      /* preamble */ 8 + /* inter frame gap */ 12;
-
   /* Standard default gre MTU. */
-  hi->max_l3_packet_bytes[VLIB_RX] = hi->max_l3_packet_bytes[VLIB_TX] = 9000;
+  vnet_sw_interface_set_mtu (vnm, sw_if_index, 9000);
 
   /*
    * source the FIB entry for the tunnel's destination
@@ -421,67 +418,97 @@ vnet_gre_tunnel_add (vnet_gre_add_del_tunnel_args_t *a,
   t->tunnel_dst.fp_proto = !is_ipv6 ? FIB_PROTOCOL_IP4 : FIB_PROTOCOL_IP6;
   t->tunnel_dst.fp_addr = a->dst;
 
-  gre_tunnel_db_add(t);
+  gre_tunnel_db_add (t, &key);
 
-  t->fib_entry_index =
-      fib_table_entry_special_add(outer_fib_index,
-                                  &t->tunnel_dst,
-                                  FIB_SOURCE_RR,
-                                  FIB_ENTRY_FLAG_NONE);
-  t->sibling_index =
-      fib_entry_child_add(t->fib_entry_index,
-                          FIB_NODE_TYPE_GRE_TUNNEL,
-                          t - gm->tunnels);
+  if (t->mode == TUNNEL_MODE_MP)
+    nhrp_walk_itf (t->sw_if_index, gre_tunnel_add_nhrp_walk, t);
 
-  if (GRE_TUNNEL_TYPE_TEB == t->type)
-  {
-      t->l2_adj_index = adj_nbr_add_or_lock(t->tunnel_dst.fp_proto,
-                                           VNET_LINK_ETHERNET,
-                                           &zero_addr,
-                                           sw_if_index);
-      gre_update_adj(vnm, t->sw_if_index, t->l2_adj_index);
-  }
+  if (t->type == GRE_TUNNEL_TYPE_ERSPAN)
+    {
+      gre_sn_key_t skey;
+      gre_sn_t *gre_sn;
+
+      gre_mk_sn_key (t, &skey);
+      gre_sn = (gre_sn_t *) hash_get_mem (gm->seq_num_by_key, &skey);
+      if (gre_sn != NULL)
+       {
+         gre_sn->ref_count++;
+         t->gre_sn = gre_sn;
+       }
+      else
+       {
+         gre_sn = clib_mem_alloc (sizeof (gre_sn_t));
+         gre_sn->seq_num = 0;
+         gre_sn->ref_count = 1;
+         t->gre_sn = gre_sn;
+         hash_set_mem_alloc (&gm->seq_num_by_key, &skey, (uword) gre_sn);
+       }
+    }
+
+  if (t->type != GRE_TUNNEL_TYPE_L3)
+    {
+      t->l2_adj_index = adj_nbr_add_or_lock
+       (t->tunnel_dst.fp_proto, VNET_LINK_ETHERNET, &zero_addr, sw_if_index);
+      gre_update_adj (vnm, t->sw_if_index, t->l2_adj_index);
+    }
 
   if (sw_if_indexp)
     *sw_if_indexp = sw_if_index;
 
+  /* register gre46-input nodes */
+  ip4_register_protocol (IP_PROTOCOL_GRE, gre4_input_node.index);
+  ip6_register_protocol (IP_PROTOCOL_GRE, gre6_input_node.index);
+
   return 0;
 }
 
 static int
-vnet_gre_tunnel_delete (vnet_gre_add_del_tunnel_args_t *a,
-                        u32 * sw_if_indexp)
+vnet_gre_tunnel_delete (vnet_gre_tunnel_add_del_args_t * a,
+                       u32 outer_fib_index, u32 * sw_if_indexp)
 {
-  gre_main_t * gm = &gre_main;
-  vnet_main_t * vnm = gm->vnet_main;
-  gre_tunnel_t * t;
+  gre_main_t *gm = &gre_main;
+  vnet_main_t *vnm = gm->vnet_main;
+  gre_tunnel_t *t;
+  gre_tunnel_key_t key;
   u32 sw_if_index;
 
-  t = gre_tunnel_db_find(&a->src, &a->dst, a->outer_fib_id, a->is_ipv6);
-
+  t = gre_tunnel_db_find (a, outer_fib_index, &key);
   if (NULL == t)
     return VNET_API_ERROR_NO_SUCH_ENTRY;
 
+  if (t->mode == TUNNEL_MODE_MP)
+    nhrp_walk_itf (t->sw_if_index, gre_tunnel_delete_nhrp_walk, t);
+
   sw_if_index = t->sw_if_index;
-  vnet_sw_interface_set_flags (vnm, sw_if_index, 0 /* down */);
+  vnet_sw_interface_set_flags (vnm, sw_if_index, 0 /* down */ );
+
   /* make sure tunnel is removed from l2 bd or xconnect */
-  set_int_l2_mode(gm->vlib_main, vnm, MODE_L3, sw_if_index, 0, 0, 0, 0);
-  vec_add1 (gm->free_gre_tunnel_hw_if_indices[t->type], t->hw_if_index);
+  set_int_l2_mode (gm->vlib_main, vnm, MODE_L3, sw_if_index, 0,
+                  L2_BD_PORT_TYPE_NORMAL, 0, 0);
   gm->tunnel_index_by_sw_if_index[sw_if_index] = ~0;
 
-  if (GRE_TUNNEL_TYPE_TEB == t->type)
-    adj_unlock(t->l2_adj_index);
+  if (t->type == GRE_TUNNEL_TYPE_L3)
+    vnet_delete_hw_interface (vnm, t->hw_if_index);
+  else
+    ethernet_delete_interface (vnm, t->hw_if_index);
 
   if (t->l2_adj_index != ADJ_INDEX_INVALID)
-      adj_unlock(t->l2_adj_index);
+    {
+      adj_midchain_delegate_unstack (t->l2_adj_index);
+      adj_unlock (t->l2_adj_index);
+    }
 
-  fib_entry_child_remove(t->fib_entry_index,
-                         t->sibling_index);
-  fib_table_entry_delete_index(t->fib_entry_index,
-                               FIB_SOURCE_RR);
+  ASSERT ((t->type != GRE_TUNNEL_TYPE_ERSPAN) || (t->gre_sn != NULL));
+  if ((t->type == GRE_TUNNEL_TYPE_ERSPAN) && (t->gre_sn->ref_count-- == 1))
+    {
+      gre_sn_key_t skey;
+      gre_mk_sn_key (t, &skey);
+      hash_unset_mem_free (&gm->seq_num_by_key, &skey);
+      clib_mem_free (t->gre_sn);
+    }
 
-  gre_tunnel_db_remove(t);
-  fib_node_deinit(&t->node);
+  hash_unset (gm->instance_used, t->user_instance);
+  gre_tunnel_db_remove (t, &key);
   pool_put (gm->tunnels, t);
 
   if (sw_if_indexp)
@@ -491,153 +518,177 @@ vnet_gre_tunnel_delete (vnet_gre_add_del_tunnel_args_t *a,
 }
 
 int
-vnet_gre_add_del_tunnel (vnet_gre_add_del_tunnel_args_t *a,
-                         u32 * sw_if_indexp)
+vnet_gre_tunnel_add_del (vnet_gre_tunnel_add_del_args_t * a,
+                        u32 * sw_if_indexp)
 {
+  u32 outer_fib_index;
+
+  if (!a->is_ipv6)
+    outer_fib_index = ip4_fib_index_from_table_id (a->outer_table_id);
+  else
+    outer_fib_index = ip6_fib_index_from_table_id (a->outer_table_id);
+
+  if (~0 == outer_fib_index)
+    return VNET_API_ERROR_NO_SUCH_FIB;
+
+  if (a->session_id > GTK_SESSION_ID_MAX)
+    return VNET_API_ERROR_INVALID_SESSION_ID;
+
+  if (a->mode == TUNNEL_MODE_MP && !ip46_address_is_zero (&a->dst))
+    return (VNET_API_ERROR_INVALID_DST_ADDRESS);
+
   if (a->is_add)
-    return (vnet_gre_tunnel_add(a, sw_if_indexp));
+    return (vnet_gre_tunnel_add (a, outer_fib_index, sw_if_indexp));
   else
-    return (vnet_gre_tunnel_delete(a, sw_if_indexp));
+    return (vnet_gre_tunnel_delete (a, outer_fib_index, sw_if_indexp));
 }
 
 clib_error_t *
 gre_interface_admin_up_down (vnet_main_t * vnm, u32 hw_if_index, u32 flags)
 {
-  gre_main_t * gm = &gre_main;
-  vnet_hw_interface_t * hi;
+  gre_main_t *gm = &gre_main;
+  vnet_hw_interface_t *hi;
   gre_tunnel_t *t;
   u32 ti;
 
   hi = vnet_get_hw_interface (vnm, hw_if_index);
 
   if (NULL == gm->tunnel_index_by_sw_if_index ||
-      hi->sw_if_index >= vec_len(gm->tunnel_index_by_sw_if_index))
-      return (NULL);
+      hi->sw_if_index >= vec_len (gm->tunnel_index_by_sw_if_index))
+    return (NULL);
 
   ti = gm->tunnel_index_by_sw_if_index[hi->sw_if_index];
 
   if (~0 == ti)
-      /* not one of ours */
-      return (NULL);
+    /* not one of ours */
+    return (NULL);
 
-  t = pool_elt_at_index(gm->tunnels, ti);
+  t = pool_elt_at_index (gm->tunnels, ti);
 
   if (flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP)
-    vnet_hw_interface_set_flags (vnm, hw_if_index, VNET_HW_INTERFACE_FLAG_LINK_UP);
+    vnet_hw_interface_set_flags (vnm, hw_if_index,
+                                VNET_HW_INTERFACE_FLAG_LINK_UP);
   else
-    vnet_hw_interface_set_flags (vnm, hw_if_index, 0 /* down */);
+    vnet_hw_interface_set_flags (vnm, hw_if_index, 0 /* down */ );
 
-  gre_tunnel_restack(t);
+  gre_tunnel_restack (t);
 
   return /* no error */ 0;
 }
 
 static clib_error_t *
 create_gre_tunnel_command_fn (vlib_main_t * vm,
-                unformat_input_t * input,
-                vlib_cli_command_t * cmd)
+                             unformat_input_t * input,
+                             vlib_cli_command_t * cmd)
 {
-  unformat_input_t _line_input, * line_input = &_line_input;
-  vnet_gre_add_del_tunnel_args_t _a, * a = &_a;
-  ip46_address_t src, dst;
-  u32 outer_fib_id = 0;
-  u8 teb = 0;
+  unformat_input_t _line_input, *line_input = &_line_input;
+  vnet_gre_tunnel_add_del_args_t _a, *a = &_a;
+  ip46_address_t src = ip46_address_initializer, dst =
+    ip46_address_initializer;
+  u32 instance = ~0;
+  u32 outer_table_id = 0;
+  gre_tunnel_type_t t_type = GRE_TUNNEL_TYPE_L3;
+  tunnel_mode_t t_mode = TUNNEL_MODE_P2P;
+  u32 session_id = 0;
   int rv;
-  u32 num_m_args = 0;
   u8 is_add = 1;
   u32 sw_if_index;
   clib_error_t *error = NULL;
-  u8 ipv4_set = 0;
-  u8 ipv6_set = 0;
 
   /* Get a line of input. */
-  if (! unformat_user (input, unformat_line_input, line_input))
+  if (!unformat_user (input, unformat_line_input, line_input))
     return 0;
 
-  while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT) {
-    if (unformat (line_input, "del"))
-      is_add = 0;
-    else if (unformat (line_input, "src %U", unformat_ip4_address, &src.ip4)) {
-      num_m_args++;
-      ipv4_set = 1;
-    } else if (unformat (line_input, "dst %U", unformat_ip4_address, &dst.ip4)) {
-      num_m_args++;
-      ipv4_set = 1;
-    } else if (unformat (line_input, "src %U", unformat_ip6_address, &src.ip6)) {
-      num_m_args++;
-      ipv6_set = 1;
-    } else if (unformat (line_input, "dst %U", unformat_ip6_address, &dst.ip6)) {
-      num_m_args++;
-      ipv6_set = 1;
-    } else if (unformat (line_input, "outer-fib-id %d", &outer_fib_id))
-      ;
-    else if (unformat (line_input, "teb"))
-      teb = 1;
-    else
-      {
-        error = clib_error_return (0, "unknown input `%U'",
-                                   format_unformat_error, line_input);
-        goto done;
-      }
-  }
-
-  if (num_m_args < 2)
+  while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
     {
-      error = clib_error_return (0, "mandatory argument(s) missing");
-      goto done;
+      if (unformat (line_input, "del"))
+       is_add = 0;
+      else if (unformat (line_input, "instance %d", &instance))
+       ;
+      else if (unformat (line_input, "src %U", unformat_ip46_address, &src))
+       ;
+      else if (unformat (line_input, "dst %U", unformat_ip46_address, &dst))
+       ;
+      else if (unformat (line_input, "outer-table-id %d", &outer_table_id))
+       ;
+      else if (unformat (line_input, "multipoint"))
+       t_mode = TUNNEL_MODE_MP;
+      else if (unformat (line_input, "teb"))
+       t_type = GRE_TUNNEL_TYPE_TEB;
+      else if (unformat (line_input, "erspan %d", &session_id))
+       t_type = GRE_TUNNEL_TYPE_ERSPAN;
+      else
+       {
+         error = clib_error_return (0, "unknown input `%U'",
+                                    format_unformat_error, line_input);
+         goto done;
+       }
     }
 
-  if ((ipv4_set && memcmp (&src.ip4, &dst.ip4, sizeof(src.ip4)) == 0) ||
-      (ipv6_set && memcmp (&src.ip6, &dst.ip6, sizeof(src.ip6)) == 0))
+  if (ip46_address_is_equal (&src, &dst))
     {
       error = clib_error_return (0, "src and dst are identical");
       goto done;
     }
 
-  if (ipv4_set && ipv6_set)
-      return clib_error_return (0, "both IPv4 and IPv6 addresses specified");
-
-  if ((ipv4_set && memcmp (&dst.ip4, &zero_addr.ip4, sizeof(dst.ip4)) == 0) ||
-      (ipv6_set && memcmp (&dst.ip6, &zero_addr.ip6, sizeof(dst.ip6)) == 0))
+  if (t_mode != TUNNEL_MODE_MP && ip46_address_is_zero (&dst))
     {
-      error = clib_error_return (0, "dst address cannot be zero");
+      error = clib_error_return (0, "destination address not specified");
       goto done;
     }
 
-  memset (a, 0, sizeof (*a));
-  a->outer_fib_id = outer_fib_id;
-  a->teb = teb;
-  a->is_ipv6 = ipv6_set;
-  if (!ipv6_set)
+  if (ip46_address_is_zero (&src))
     {
-      clib_memcpy(&a->src.ip4, &src.ip4, sizeof(src.ip4));
-      clib_memcpy(&a->dst.ip4, &dst.ip4, sizeof(dst.ip4));
+      error = clib_error_return (0, "source address not specified");
+      goto done;
     }
-  else
+
+  if (ip46_address_is_ip4 (&src) != ip46_address_is_ip4 (&dst))
     {
-      clib_memcpy(&a->src.ip6, &src.ip6, sizeof(src.ip6));
-      clib_memcpy(&a->dst.ip6, &dst.ip6, sizeof(dst.ip6));
+      error =
+       clib_error_return (0, "src and dst address must be the same AF");
+      goto done;
     }
 
-  if (is_add)
-    rv = vnet_gre_tunnel_add(a, &sw_if_index);
-  else
-    rv = vnet_gre_tunnel_delete(a, &sw_if_index);
+  clib_memset (a, 0, sizeof (*a));
+  a->is_add = is_add;
+  a->outer_table_id = outer_table_id;
+  a->type = t_type;
+  a->mode = t_mode;
+  a->session_id = session_id;
+  a->is_ipv6 = !ip46_address_is_ip4 (&src);
+  a->instance = instance;
+  clib_memcpy (&a->src, &src, sizeof (a->src));
+  clib_memcpy (&a->dst, &dst, sizeof (a->dst));
+
+  rv = vnet_gre_tunnel_add_del (a, &sw_if_index);
 
-  switch(rv)
+  switch (rv)
     {
     case 0:
-      vlib_cli_output(vm, "%U\n", format_vnet_sw_if_index_name, vnet_get_main(), sw_if_index);
+      vlib_cli_output (vm, "%U\n", format_vnet_sw_if_index_name,
+                      vnet_get_main (), sw_if_index);
       break;
-    case VNET_API_ERROR_INVALID_VALUE:
+    case VNET_API_ERROR_IF_ALREADY_EXISTS:
       error = clib_error_return (0, "GRE tunnel already exists...");
       goto done;
     case VNET_API_ERROR_NO_SUCH_FIB:
-      error = clib_error_return (0, "outer fib ID %d doesn't exist\n",
-                                 outer_fib_id);
+      error = clib_error_return (0, "outer table ID %d doesn't exist\n",
+                                outer_table_id);
+      goto done;
+    case VNET_API_ERROR_NO_SUCH_ENTRY:
+      error = clib_error_return (0, "GRE tunnel doesn't exist");
+      goto done;
+    case VNET_API_ERROR_INVALID_SESSION_ID:
+      error = clib_error_return (0, "session ID %d out of range\n",
+                                session_id);
+      goto done;
+    case VNET_API_ERROR_INSTANCE_IN_USE:
+      error = clib_error_return (0, "Instance is in use");
       goto done;
     default:
-      error = clib_error_return (0, "vnet_gre_add_del_tunnel returned %d", rv);
+      error =
+       clib_error_return (0, "vnet_gre_tunnel_add_del returned %d", rv);
       goto done;
     }
 
@@ -647,20 +698,22 @@ done:
   return error;
 }
 
+/* *INDENT-OFF* */
 VLIB_CLI_COMMAND (create_gre_tunnel_command, static) = {
   .path = "create gre tunnel",
-  .short_help = "create gre tunnel src <addr> dst <addr> "
-                "[outer-fib-id <fib>] [teb] [del]",
+  .short_help = "create gre tunnel src <addr> dst <addr> [instance <n>] "
+                "[outer-fib-id <fib>] [teb | erspan <session-id>] [del]",
   .function = create_gre_tunnel_command_fn,
 };
+/* *INDENT-ON* */
 
 static clib_error_t *
 show_gre_tunnel_command_fn (vlib_main_t * vm,
-                            unformat_input_t * input,
-                            vlib_cli_command_t * cmd)
+                           unformat_input_t * input,
+                           vlib_cli_command_t * cmd)
 {
-  gre_main_t * gm = &gre_main;
-  gre_tunnel_t * t;
+  gre_main_t *gm = &gre_main;
+  gre_tunnel_t *t;
   u32 ti = ~0;
 
   if (pool_elts (gm->tunnels) == 0)
@@ -669,38 +722,57 @@ show_gre_tunnel_command_fn (vlib_main_t * vm,
   while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
     {
       if (unformat (input, "%d", &ti))
-        ;
+       ;
       else
-        break;
+       break;
     }
 
   if (~0 == ti)
     {
+      /* *INDENT-OFF* */
       pool_foreach (t, gm->tunnels,
       ({
           vlib_cli_output (vm, "%U", format_gre_tunnel, t);
       }));
+      /* *INDENT-ON* */
     }
   else
-  {
-      t = pool_elt_at_index(gm->tunnels, ti);
+    {
+      t = pool_elt_at_index (gm->tunnels, ti);
 
       vlib_cli_output (vm, "%U", format_gre_tunnel, t);
-  }
+    }
 
   return 0;
 }
 
+/* *INDENT-OFF* */
 VLIB_CLI_COMMAND (show_gre_tunnel_command, static) = {
     .path = "show gre tunnel",
     .function = show_gre_tunnel_command_fn,
 };
+/* *INDENT-ON* */
+
+const static nhrp_vft_t gre_nhrp_vft = {
+  .nv_added = gre_nhrp_entry_added,
+  .nv_deleted = gre_nhrp_entry_deleted,
+};
 
 /* force inclusion from application's main.c */
-clib_error_t *gre_interface_init (vlib_main_t *vm)
+clib_error_t *
+gre_interface_init (vlib_main_t * vm)
 {
-  fib_node_register_type(FIB_NODE_TYPE_GRE_TUNNEL, &gre_vft);
+  nhrp_register (&gre_nhrp_vft);
 
-  return 0;
+  return (NULL);
 }
-VLIB_INIT_FUNCTION(gre_interface_init);
+
+VLIB_INIT_FUNCTION (gre_interface_init);
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */