Optimize GRE Tunnel and add support for ERSPAN encap
[vpp.git] / src / vnet / gre / interface.c
index ce9685d..97c4f16 100644 (file)
@@ -31,35 +31,41 @@ static u8 *
 format_gre_tunnel (u8 * s, va_list * args)
 {
   gre_tunnel_t *t = va_arg (*args, gre_tunnel_t *);
-  gre_main_t *gm = &gre_main;
 
-  s = format (s, "[%d] src %U dst %U fib-idx %d sw-if-idx %d ",
-             t - gm->tunnels,
+  s = format (s, "[%d] instance %d src %U dst %U fib-idx %d sw-if-idx %d ",
+             t->dev_instance, t->user_instance,
              format_ip46_address, &t->tunnel_src, IP46_TYPE_ANY,
              format_ip46_address, &t->tunnel_dst.fp_addr, IP46_TYPE_ANY,
              t->outer_fib_index, t->sw_if_index);
 
-  s = format (s, "payload %s", gre_tunnel_type_names[t->type]);
+  s = format (s, "payload %s ", gre_tunnel_type_names[t->type]);
+
+  if (t->type == GRE_TUNNEL_TYPE_ERSPAN)
+    s = format (s, "session %d ", t->session_id);
+
+  if (t->type != GRE_TUNNEL_TYPE_L3)
+    s = format (s, "l2-adj-idx %d ", t->l2_adj_index);
 
   return s;
 }
 
 static gre_tunnel_t *
-gre_tunnel_db_find (const ip46_address_t * src,
-                   const ip46_address_t * dst,
-                   u32 out_fib_index, u8 is_ipv6, gre_tunnel_key_t * key)
+gre_tunnel_db_find (const vnet_gre_add_del_tunnel_args_t * a,
+                   u32 outer_fib_index, gre_tunnel_key_t * key)
 {
   gre_main_t *gm = &gre_main;
   uword *p;
 
-  if (!is_ipv6)
+  if (!a->is_ipv6)
     {
-      gre_mk_key4 (&src->ip4, &dst->ip4, out_fib_index, &key->gtk_v4);
+      gre_mk_key4 (a->src.ip4, a->dst.ip4, outer_fib_index,
+                  a->tunnel_type, a->session_id, &key->gtk_v4);
       p = hash_get_mem (gm->tunnel_by_key4, &key->gtk_v4);
     }
   else
     {
-      gre_mk_key6 (&src->ip6, &dst->ip6, out_fib_index, &key->gtk_v6);
+      gre_mk_key6 (&a->src.ip6, &a->dst.ip6, outer_fib_index,
+                  a->tunnel_type, a->session_id, &key->gtk_v6);
       p = hash_get_mem (gm->tunnel_by_key6, &key->gtk_v6);
     }
 
@@ -79,11 +85,11 @@ gre_tunnel_db_add (gre_tunnel_t * t, gre_tunnel_key_t * key)
 
   if (t->tunnel_dst.fp_proto == FIB_PROTOCOL_IP6)
     {
-      hash_set_mem (gm->tunnel_by_key6, &t->key->gtk_v6, t - gm->tunnels);
+      hash_set_mem (gm->tunnel_by_key6, &t->key->gtk_v6, t->dev_instance);
     }
   else
     {
-      hash_set_mem (gm->tunnel_by_key4, &t->key->gtk_v4, t - gm->tunnels);
+      hash_set_mem (gm->tunnel_by_key4, &t->key->gtk_v4, t->dev_instance);
     }
 }
 
@@ -136,23 +142,48 @@ gre_tunnel_stack (adj_index_t ai)
   gt = pool_elt_at_index (gm->tunnels,
                          gm->tunnel_index_by_sw_if_index[sw_if_index]);
 
-  /*
-   * find the adjacency that is contributed by the FIB entry
-   * that this tunnel resovles via, and use it as the next adj
-   * in the midchain
-   */
-  if (vnet_hw_interface_get_flags (vnet_get_main (),
-                                  gt->hw_if_index) &
-      VNET_HW_INTERFACE_FLAG_LINK_UP)
+  if ((vnet_hw_interface_get_flags (vnet_get_main (), gt->hw_if_index) &
+       VNET_HW_INTERFACE_FLAG_LINK_UP) == 0)
     {
-      adj_nbr_midchain_stack (ai,
-                             fib_entry_contribute_ip_forwarding
-                             (gt->fib_entry_index));
+      adj_nbr_midchain_unstack (ai);
+      return;
     }
-  else
+
+  dpo_id_t tmp = DPO_INVALID;
+  fib_forward_chain_type_t fib_fwd = (FIB_PROTOCOL_IP6 == adj->ia_nh_proto) ?
+    FIB_FORW_CHAIN_TYPE_UNICAST_IP6 : FIB_FORW_CHAIN_TYPE_UNICAST_IP4;
+
+  fib_entry_contribute_forwarding (gt->fib_entry_index, fib_fwd, &tmp);
+  if (DPO_LOAD_BALANCE == tmp.dpoi_type)
     {
-      adj_nbr_midchain_unstack (ai);
+      /*
+       * post GRE rewrite we will load-balance. However, the GRE encap
+       * is always the same for this adjacency/tunnel and hence the IP/GRE
+       * src,dst hash is always the same result too. So we do that hash now and
+       * stack on the choice.
+       * If the choice is an incomplete adj then we will need a poke when
+       * it becomes complete. This happens since the adj update walk propagates
+       * as far a recursive paths.
+       */
+      const dpo_id_t *choice;
+      load_balance_t *lb;
+      int hash;
+
+      lb = load_balance_get (tmp.dpoi_index);
+
+      if (fib_fwd == FIB_FORW_CHAIN_TYPE_UNICAST_IP4)
+       hash = ip4_compute_flow_hash ((ip4_header_t *) adj_get_rewrite (ai),
+                                     lb->lb_hash_config);
+      else
+       hash = ip6_compute_flow_hash ((ip6_header_t *) adj_get_rewrite (ai),
+                                     lb->lb_hash_config);
+      choice =
+       load_balance_get_bucket_i (lb, hash & lb->lb_n_buckets_minus_1);
+      dpo_copy (&tmp, choice);
     }
+
+  adj_nbr_midchain_stack (ai, &tmp);
+  dpo_reset (&tmp);
 }
 
 /**
@@ -230,7 +261,8 @@ const static fib_node_vft_t gre_vft = {
 };
 
 static int
-vnet_gre_tunnel_add (vnet_gre_add_del_tunnel_args_t * a, u32 * sw_if_indexp)
+vnet_gre_tunnel_add (vnet_gre_add_del_tunnel_args_t * a,
+                    u32 outer_fib_index, u32 * sw_if_indexp)
 {
   gre_main_t *gm = &gre_main;
   vnet_main_t *vnm = gm->vnet_main;
@@ -239,112 +271,67 @@ vnet_gre_tunnel_add (vnet_gre_add_del_tunnel_args_t * a, u32 * sw_if_indexp)
   gre_tunnel_t *t;
   vnet_hw_interface_t *hi;
   u32 hw_if_index, sw_if_index;
-  u32 outer_fib_index;
-  u8 address[6];
   clib_error_t *error;
   u8 is_ipv6 = a->is_ipv6;
   gre_tunnel_key_t key;
 
-  if (!is_ipv6)
-    outer_fib_index = ip4_fib_index_from_table_id (a->outer_fib_id);
-  else
-    outer_fib_index = ip6_fib_index_from_table_id (a->outer_fib_id);
-
-  if (~0 == outer_fib_index)
-    return VNET_API_ERROR_NO_SUCH_FIB;
-
-  t =
-    gre_tunnel_db_find (&a->src, &a->dst, outer_fib_index, a->is_ipv6, &key);
-
+  t = gre_tunnel_db_find (a, outer_fib_index, &key);
   if (NULL != t)
-    return VNET_API_ERROR_INVALID_VALUE;
+    return VNET_API_ERROR_IF_ALREADY_EXISTS;
 
   pool_get_aligned (gm->tunnels, t, CLIB_CACHE_LINE_BYTES);
   memset (t, 0, sizeof (*t));
-  fib_node_init (&t->node, FIB_NODE_TYPE_GRE_TUNNEL);
 
-  if (a->teb)
-    t->type = GRE_TUNNEL_TYPE_TEB;
-  else
-    t->type = GRE_TUNNEL_TYPE_L3;
-
-  if (vec_len (gm->free_gre_tunnel_hw_if_indices[t->type]) > 0)
+  /* Reconcile the real dev_instance and a possible requested instance */
+  u32 t_idx = t - gm->tunnels; /* tunnel index (or instance) */
+  u32 u_idx = a->instance;     /* user specified instance */
+  if (u_idx == ~0)
+    u_idx = t_idx;
+  if (hash_get (gm->instance_used, u_idx))
     {
-      vnet_interface_main_t *im = &vnm->interface_main;
-
-      hw_if_index = gm->free_gre_tunnel_hw_if_indices[t->type]
-       [vec_len (gm->free_gre_tunnel_hw_if_indices[t->type]) - 1];
-      _vec_len (gm->free_gre_tunnel_hw_if_indices[t->type]) -= 1;
-
-      hi = vnet_get_hw_interface (vnm, hw_if_index);
-      hi->dev_instance = t - gm->tunnels;
-      hi->hw_instance = hi->dev_instance;
-
-      /* clear old stats of freed tunnel before reuse */
-      sw_if_index = hi->sw_if_index;
-      vnet_interface_counter_lock (im);
-      vlib_zero_combined_counter
-       (&im->combined_sw_if_counters[VNET_INTERFACE_COUNTER_TX],
-        sw_if_index);
-      vlib_zero_combined_counter (&im->combined_sw_if_counters
-                                 [VNET_INTERFACE_COUNTER_RX], sw_if_index);
-      vlib_zero_simple_counter (&im->sw_if_counters
-                               [VNET_INTERFACE_COUNTER_DROP], sw_if_index);
-      vnet_interface_counter_unlock (im);
-      if (GRE_TUNNEL_TYPE_TEB == t->type)
-       {
-         t->l2_tx_arc = vlib_node_add_named_next (vlib_get_main (),
-                                                  hi->tx_node_index,
-                                                  "adj-l2-midchain");
-       }
+      pool_put (gm->tunnels, t);
+      return VNET_API_ERROR_INSTANCE_IN_USE;
     }
+  hash_set (gm->instance_used, u_idx, 1);
+
+  t->dev_instance = t_idx;     /* actual */
+  t->user_instance = u_idx;    /* name */
+  fib_node_init (&t->node, FIB_NODE_TYPE_GRE_TUNNEL);
+
+  t->type = a->tunnel_type;
+  if (t->type == GRE_TUNNEL_TYPE_ERSPAN)
+    t->session_id = a->session_id;
+
+  if (t->type == GRE_TUNNEL_TYPE_L3)
+    hw_if_index = vnet_register_interface (vnm, gre_device_class.index, t_idx,
+                                          gre_hw_interface_class.index,
+                                          t_idx);
   else
     {
-      if (GRE_TUNNEL_TYPE_TEB == t->type)
-       {
-         /* Default MAC address (d00b:eed0:0000 + sw_if_index) */
-         memset (address, 0, sizeof (address));
-         address[0] = 0xd0;
-         address[1] = 0x0b;
-         address[2] = 0xee;
-         address[3] = 0xd0;
-         address[4] = t - gm->tunnels;
-
-         error = ethernet_register_interface (vnm,
-                                              gre_device_teb_class.index,
-                                              t - gm->tunnels, address,
-                                              &hw_if_index, 0);
-
-         if (error)
-           {
-             clib_error_report (error);
-             return VNET_API_ERROR_INVALID_REGISTRATION;
-           }
-         hi = vnet_get_hw_interface (vnm, hw_if_index);
-
-         t->l2_tx_arc = vlib_node_add_named_next (vlib_get_main (),
-                                                  hi->tx_node_index,
-                                                  "adj-l2-midchain");
-       }
-      else
+      /* Default MAC address (d00b:eed0:0000 + sw_if_index) */
+      u8 address[6] = { 0xd0, 0x0b, 0xee, 0xd0, (u8) t_idx >> 8, (u8) t_idx };
+      error = ethernet_register_interface (vnm, gre_device_class.index, t_idx,
+                                          address, &hw_if_index, 0);
+      if (error)
        {
-         hw_if_index = vnet_register_interface (vnm,
-                                                gre_device_class.index,
-                                                t - gm->tunnels,
-                                                gre_hw_interface_class.index,
-                                                t - gm->tunnels);
+         clib_error_report (error);
+         return VNET_API_ERROR_INVALID_REGISTRATION;
        }
-      hi = vnet_get_hw_interface (vnm, hw_if_index);
-      sw_if_index = hi->sw_if_index;
     }
 
+  /* Set GRE tunnel interface output node (not used for L3 payload) */
+  vnet_set_interface_output_node (vnm, hw_if_index, gre_encap_node.index);
+
+  hi = vnet_get_hw_interface (vnm, hw_if_index);
+  sw_if_index = hi->sw_if_index;
+
   t->hw_if_index = hw_if_index;
   t->outer_fib_index = outer_fib_index;
   t->sw_if_index = sw_if_index;
   t->l2_adj_index = ADJ_INDEX_INVALID;
 
   vec_validate_init_empty (gm->tunnel_index_by_sw_if_index, sw_if_index, ~0);
-  gm->tunnel_index_by_sw_if_index[sw_if_index] = t - gm->tunnels;
+  gm->tunnel_index_by_sw_if_index[sw_if_index] = t_idx;
 
   if (!is_ipv6)
     {
@@ -378,20 +365,37 @@ vnet_gre_tunnel_add (vnet_gre_add_del_tunnel_args_t * a, u32 * sw_if_indexp)
   t->tunnel_dst.fp_addr = a->dst;
 
   gre_tunnel_db_add (t, &key);
+  if (t->type == GRE_TUNNEL_TYPE_ERSPAN)
+    {
+      gre_sn_key_t skey;
+      gre_sn_t *gre_sn;
 
-  t->fib_entry_index =
-    fib_table_entry_special_add (outer_fib_index,
-                                &t->tunnel_dst,
-                                FIB_SOURCE_RR, FIB_ENTRY_FLAG_NONE);
-  t->sibling_index =
-    fib_entry_child_add (t->fib_entry_index,
-                        FIB_NODE_TYPE_GRE_TUNNEL, t - gm->tunnels);
+      gre_mk_sn_key (t, &skey);
+      gre_sn = (gre_sn_t *) hash_get_mem (gm->seq_num_by_key, &skey);
+      if (gre_sn != NULL)
+       {
+         gre_sn->ref_count++;
+         t->gre_sn = gre_sn;
+       }
+      else
+       {
+         gre_sn = clib_mem_alloc (sizeof (gre_sn_t));
+         gre_sn->seq_num = 0;
+         gre_sn->ref_count = 1;
+         t->gre_sn = gre_sn;
+         hash_set_mem_alloc (&gm->seq_num_by_key, &skey, (uword) gre_sn);
+       }
+    }
+
+  t->fib_entry_index = fib_table_entry_special_add
+    (outer_fib_index, &t->tunnel_dst, FIB_SOURCE_RR, FIB_ENTRY_FLAG_NONE);
+  t->sibling_index = fib_entry_child_add
+    (t->fib_entry_index, FIB_NODE_TYPE_GRE_TUNNEL, t_idx);
 
-  if (GRE_TUNNEL_TYPE_TEB == t->type)
+  if (t->type != GRE_TUNNEL_TYPE_L3)
     {
-      t->l2_adj_index = adj_nbr_add_or_lock (t->tunnel_dst.fp_proto,
-                                            VNET_LINK_ETHERNET,
-                                            &zero_addr, sw_if_index);
+      t->l2_adj_index = adj_nbr_add_or_lock
+       (t->tunnel_dst.fp_proto, VNET_LINK_ETHERNET, &zero_addr, sw_if_index);
       gre_update_adj (vnm, t->sw_if_index, t->l2_adj_index);
     }
 
@@ -403,38 +407,29 @@ vnet_gre_tunnel_add (vnet_gre_add_del_tunnel_args_t * a, u32 * sw_if_indexp)
 
 static int
 vnet_gre_tunnel_delete (vnet_gre_add_del_tunnel_args_t * a,
-                       u32 * sw_if_indexp)
+                       u32 outer_fib_index, u32 * sw_if_indexp)
 {
   gre_main_t *gm = &gre_main;
   vnet_main_t *vnm = gm->vnet_main;
   gre_tunnel_t *t;
   gre_tunnel_key_t key;
   u32 sw_if_index;
-  u32 outer_fib_index;
-
-  if (!a->is_ipv6)
-    outer_fib_index = ip4_fib_index_from_table_id (a->outer_fib_id);
-  else
-    outer_fib_index = ip6_fib_index_from_table_id (a->outer_fib_id);
-
-  if (~0 == outer_fib_index)
-    return VNET_API_ERROR_NO_SUCH_FIB;
-
-  t =
-    gre_tunnel_db_find (&a->src, &a->dst, outer_fib_index, a->is_ipv6, &key);
 
+  t = gre_tunnel_db_find (a, outer_fib_index, &key);
   if (NULL == t)
     return VNET_API_ERROR_NO_SUCH_ENTRY;
 
   sw_if_index = t->sw_if_index;
   vnet_sw_interface_set_flags (vnm, sw_if_index, 0 /* down */ );
+
   /* make sure tunnel is removed from l2 bd or xconnect */
   set_int_l2_mode (gm->vlib_main, vnm, MODE_L3, sw_if_index, 0, 0, 0, 0);
-  vec_add1 (gm->free_gre_tunnel_hw_if_indices[t->type], t->hw_if_index);
   gm->tunnel_index_by_sw_if_index[sw_if_index] = ~0;
 
-  if (GRE_TUNNEL_TYPE_TEB == t->type)
-    adj_unlock (t->l2_adj_index);
+  if (t->type == GRE_TUNNEL_TYPE_L3)
+    vnet_delete_hw_interface (vnm, t->hw_if_index);
+  else
+    ethernet_delete_interface (vnm, t->hw_if_index);
 
   if (t->l2_adj_index != ADJ_INDEX_INVALID)
     adj_unlock (t->l2_adj_index);
@@ -442,6 +437,16 @@ vnet_gre_tunnel_delete (vnet_gre_add_del_tunnel_args_t * a,
   fib_entry_child_remove (t->fib_entry_index, t->sibling_index);
   fib_table_entry_delete_index (t->fib_entry_index, FIB_SOURCE_RR);
 
+  ASSERT ((t->type != GRE_TUNNEL_TYPE_ERSPAN) || (t->gre_sn != NULL));
+  if ((t->type == GRE_TUNNEL_TYPE_ERSPAN) && (t->gre_sn->ref_count-- == 1))
+    {
+      gre_sn_key_t skey;
+      gre_mk_sn_key (t, &skey);
+      hash_unset_mem_free (&gm->seq_num_by_key, &skey);
+      clib_mem_free (t->gre_sn);
+    }
+
+  hash_unset (gm->instance_used, t->user_instance);
   gre_tunnel_db_remove (t);
   fib_node_deinit (&t->node);
   pool_put (gm->tunnels, t);
@@ -456,10 +461,23 @@ int
 vnet_gre_add_del_tunnel (vnet_gre_add_del_tunnel_args_t * a,
                         u32 * sw_if_indexp)
 {
+  u32 outer_fib_index;
+
+  if (!a->is_ipv6)
+    outer_fib_index = ip4_fib_index_from_table_id (a->outer_fib_id);
+  else
+    outer_fib_index = ip6_fib_index_from_table_id (a->outer_fib_id);
+
+  if (~0 == outer_fib_index)
+    return VNET_API_ERROR_NO_SUCH_FIB;
+
+  if (a->session_id > GTK_SESSION_ID_MAX)
+    return VNET_API_ERROR_INVALID_SESSION_ID;
+
   if (a->is_add)
-    return (vnet_gre_tunnel_add (a, sw_if_indexp));
+    return (vnet_gre_tunnel_add (a, outer_fib_index, sw_if_indexp));
   else
-    return (vnet_gre_tunnel_delete (a, sw_if_indexp));
+    return (vnet_gre_tunnel_delete (a, outer_fib_index, sw_if_indexp));
 }
 
 clib_error_t *
@@ -503,8 +521,10 @@ create_gre_tunnel_command_fn (vlib_main_t * vm,
   unformat_input_t _line_input, *line_input = &_line_input;
   vnet_gre_add_del_tunnel_args_t _a, *a = &_a;
   ip46_address_t src, dst;
+  u32 instance = ~0;
   u32 outer_fib_id = 0;
-  u8 teb = 0;
+  gre_tunnel_type_t t_type = GRE_TUNNEL_TYPE_L3;
+  u32 session_id = 0;
   int rv;
   u32 num_m_args = 0;
   u8 is_add = 1;
@@ -521,6 +541,8 @@ create_gre_tunnel_command_fn (vlib_main_t * vm,
     {
       if (unformat (line_input, "del"))
        is_add = 0;
+      else if (unformat (line_input, "instance %d", &instance))
+       ;
       else
        if (unformat (line_input, "src %U", unformat_ip4_address, &src.ip4))
        {
@@ -548,7 +570,9 @@ create_gre_tunnel_command_fn (vlib_main_t * vm,
       else if (unformat (line_input, "outer-fib-id %d", &outer_fib_id))
        ;
       else if (unformat (line_input, "teb"))
-       teb = 1;
+       t_type = GRE_TUNNEL_TYPE_TEB;
+      else if (unformat (line_input, "erspan %d", &session_id))
+       t_type = GRE_TUNNEL_TYPE_ERSPAN;
       else
        {
          error = clib_error_return (0, "unknown input `%U'",
@@ -582,9 +606,12 @@ create_gre_tunnel_command_fn (vlib_main_t * vm,
     }
 
   memset (a, 0, sizeof (*a));
+  a->is_add = is_add;
   a->outer_fib_id = outer_fib_id;
-  a->teb = teb;
+  a->tunnel_type = t_type;
+  a->session_id = session_id;
   a->is_ipv6 = ipv6_set;
+  a->instance = instance;
   if (!ipv6_set)
     {
       clib_memcpy (&a->src.ip4, &src.ip4, sizeof (src.ip4));
@@ -596,10 +623,7 @@ create_gre_tunnel_command_fn (vlib_main_t * vm,
       clib_memcpy (&a->dst.ip6, &dst.ip6, sizeof (dst.ip6));
     }
 
-  if (is_add)
-    rv = vnet_gre_tunnel_add (a, &sw_if_index);
-  else
-    rv = vnet_gre_tunnel_delete (a, &sw_if_index);
+  rv = vnet_gre_add_del_tunnel (a, &sw_if_index);
 
   switch (rv)
     {
@@ -607,13 +631,23 @@ create_gre_tunnel_command_fn (vlib_main_t * vm,
       vlib_cli_output (vm, "%U\n", format_vnet_sw_if_index_name,
                       vnet_get_main (), sw_if_index);
       break;
-    case VNET_API_ERROR_INVALID_VALUE:
+    case VNET_API_ERROR_IF_ALREADY_EXISTS:
       error = clib_error_return (0, "GRE tunnel already exists...");
       goto done;
     case VNET_API_ERROR_NO_SUCH_FIB:
       error = clib_error_return (0, "outer fib ID %d doesn't exist\n",
                                 outer_fib_id);
       goto done;
+    case VNET_API_ERROR_NO_SUCH_ENTRY:
+      error = clib_error_return (0, "GRE tunnel doesn't exist");
+      goto done;
+    case VNET_API_ERROR_INVALID_SESSION_ID:
+      error = clib_error_return (0, "session ID %d out of range\n",
+                                session_id);
+      goto done;
+    case VNET_API_ERROR_INSTANCE_IN_USE:
+      error = clib_error_return (0, "Instance is in use");
+      goto done;
     default:
       error =
        clib_error_return (0, "vnet_gre_add_del_tunnel returned %d", rv);
@@ -629,8 +663,8 @@ done:
 /* *INDENT-OFF* */
 VLIB_CLI_COMMAND (create_gre_tunnel_command, static) = {
   .path = "create gre tunnel",
-  .short_help = "create gre tunnel src <addr> dst <addr> "
-                "[outer-fib-id <fib>] [teb] [del]",
+  .short_help = "create gre tunnel src <addr> dst <addr> [instance <n>] "
+                "[outer-fib-id <fib>] [teb | erspan <session-id>] [del]",
   .function = create_gre_tunnel_command_fn,
 };
 /* *INDENT-ON* */