Support multiple VXLAN tunnels with same mcast dst 44/4444/2
authorEyal Bari <ebari@cisco.com>
Wed, 21 Dec 2016 10:45:47 +0000 (12:45 +0200)
committerDamjan Marion <dmarion.lists@gmail.com>
Wed, 21 Dec 2016 17:50:42 +0000 (17:50 +0000)
Support multiple vni's with the same group(multicast) address.
Implemented by sharing local adj. and rewrite among tunnels.

Change-Id: I8ca036b87af65dd6a8f1aff7da1855f72b4c8f4b
Signed-off-by: Eyal Bari <ebari@cisco.com>
vnet/vnet/vxlan/vxlan.c
vnet/vnet/vxlan/vxlan.h

index 894cc3d..abf0182 100644 (file)
@@ -289,14 +289,72 @@ static int vxlan_check_decap_next(vxlan_main_t * vxm, u32 is_ip6, u32 decap_next
   return 0;
 }
 
+static uword
+vtep_addr_ref(ip46_address_t *ip)
+{
+       if (!ip46_address_is_ip4(ip))
+               return 1; /* always create */
+       uword *pvtep = hash_get (vxlan_main.vtep4, ip->ip4.as_u32);
+       if (pvtep) 
+               return ++pvtep[0];
+       hash_set (vxlan_main.vtep4, ip->ip4.as_u32, 1);
+       return 1;
+}
+
+static uword
+vtep_addr_unref(ip46_address_t *ip)
+{
+       if (!ip46_address_is_ip4(ip))
+               return 0; /* alwways destroy */
+       uword *pvtep = hash_get (vxlan_main.vtep4, ip->ip4.as_u32);
+        ASSERT(pvtep);
+       if (!(--pvtep[0]))
+               hash_unset (vxlan_main.vtep4, ip->ip4.as_u32);
+       return pvtep[0];
+}
+
+static
+mcast_remote_t *
+mcast_ep_get(ip46_address_t * ip)
+{
+        ASSERT(ip46_address_is_multicast(ip));
+       uword * ep_idx = hash_get_mem (vxlan_main.mcast_ep_by_ip, ip);
+        ASSERT(ep_idx);
+       return pool_elt_at_index(vxlan_main.mcast_eps, *ep_idx);
+}
+
+static void
+mcast_ep_add(mcast_remote_t * new_ep)
+{
+       mcast_remote_t * ep;
+
+       pool_get_aligned (vxlan_main.mcast_eps, ep, CLIB_CACHE_LINE_BYTES);
+       *ep = *new_ep;
+       hash_set_mem (vxlan_main.mcast_ep_by_ip, &ep->ip, ep - vxlan_main.mcast_eps);
+}
+
+static void
+mcast_ep_remove(mcast_remote_t * ep)
+{
+       hash_unset_mem (vxlan_main.mcast_ep_by_ip, &ep->ip);
+       pool_put (vxlan_main.mcast_eps, ep);
+}
+
+static void
+ip46_multicast_ethernet_address(u8 * ethernet_address, ip46_address_t * ip) {
+          if (ip46_address_is_ip4(ip))
+              ip4_multicast_ethernet_address(ethernet_address, &ip->ip4);
+          else
+              ip6_multicast_ethernet_address(ethernet_address, ip->ip6.as_u32[0]);
+}
+
 int vnet_vxlan_add_del_tunnel 
 (vnet_vxlan_add_del_tunnel_args_t *a, u32 * sw_if_indexp)
 {
   vxlan_main_t * vxm = &vxlan_main;
   vxlan_tunnel_t *t = 0;
   vnet_main_t * vnm = vxm->vnet_main;
-  vnet_hw_interface_t * hi;
-  uword * p, * pvtep;
+  uword * p;
   u32 hw_if_index = ~0;
   u32 sw_if_index = ~0;
   int rv;
@@ -309,10 +367,6 @@ int vnet_vxlan_add_del_tunnel
       key4.src = a->dst.ip4.as_u32; /* decap src in key is encap dst in config */
       key4.vni = clib_host_to_net_u32 (a->vni << 8);
       p = hash_get (vxm->vxlan4_tunnel_by_key, key4.as_u64);
-      if (ip4_address_is_multicast (&a->dst.ip4))
-       pvtep = hash_get (vxm->vtep4, a->dst.ip4.as_u32);
-      else
-       pvtep = hash_get (vxm->vtep4, a->src.ip4.as_u32);
     } 
   else 
     {
@@ -320,7 +374,6 @@ int vnet_vxlan_add_del_tunnel
       key6.src.as_u64[1] = a->dst.ip6.as_u64[1];
       key6.vni = clib_host_to_net_u32 (a->vni << 8);
       p = hash_get_mem (vxm->vxlan6_tunnel_by_key, &key6);
-      pvtep = NULL;  /* ip6 vxlan-bypass not yet implemented */
     }
   
   if (a->is_add)
@@ -368,21 +421,11 @@ int vnet_vxlan_add_del_tunnel
         }
 
       if (!is_ip6)
-        {
-         hash_set (vxm->vxlan4_tunnel_by_key, key4.as_u64, t - vxm->tunnels);
-         if (pvtep) 
-           pvtep[0]++;
-         else
-           {
-             if (ip4_address_is_multicast (&a->dst.ip4))
-               hash_set (vxm->vtep4, a->dst.ip4.as_u32, 1);
-             else
-               hash_set (vxm->vtep4, a->src.ip4.as_u32, 1);
-           }
-        }
+       hash_set (vxm->vxlan4_tunnel_by_key, key4.as_u64, t - vxm->tunnels);
       else
         hash_set_mem (vxm->vxlan6_tunnel_by_key, t->key6, t - vxm->tunnels);
-      
+
+      vnet_hw_interface_t * hi;
       if (vec_len (vxm->free_vxlan_tunnel_hw_if_indices) > 0)
         {
          vnet_interface_main_t * im = &vnm->interface_main;
@@ -441,6 +484,7 @@ int vnet_vxlan_add_del_tunnel
            * when the forwarding for the entry updates, and the tunnel can
            * re-stack accordingly
            */
+         vtep_addr_ref(&t->src);
           t->fib_entry_index = fib_table_entry_special_add
             (t->encap_fib_index, &tun_dst_pfx, FIB_SOURCE_RR,
             FIB_ENTRY_FLAG_NONE, ADJ_INDEX_INVALID);
@@ -448,41 +492,41 @@ int vnet_vxlan_add_del_tunnel
             (t->fib_entry_index, FIB_NODE_TYPE_VXLAN_TUNNEL, t - vxm->tunnels);
           vxlan_tunnel_restack_dpo(t);
        } 
-      else if (pvtep == NULL)
+      else
         {
          /* Multicast tunnel -
           * as the same mcast group can be used for mutiple mcast tunnels
           * with different VNIs, create the output fib adjecency only if
           * it does not already exist
           */
-          fib_protocol_t fp;
-          u8 mcast_mac[6];
-          if (!is_ip6) {
-              ip4_multicast_ethernet_address(mcast_mac, &t->dst.ip4);
-              fp = FIB_PROTOCOL_IP4;
-          } else {
-              ip6_multicast_ethernet_address(mcast_mac, t->dst.ip6.as_u32[0]);
-              fp = FIB_PROTOCOL_IP6;
-          }
-          t->mcast_adj_index = adj_rewrite_add_and_lock
-              (fp, fib_proto_to_link(fp), t->mcast_sw_if_index, mcast_mac);
-
-         flood_class = VNET_FLOOD_CLASS_TUNNEL_MASTER;
-
-          /* Stack mcast dst mac addr rewrite on encap */
-          dpo_proto_t dproto = fib_proto_to_dpo(fp);
+          fib_protocol_t fp = (is_ip6) ? FIB_PROTOCOL_IP6 : FIB_PROTOCOL_IP4;
           dpo_id_t dpo = DPO_INVALID;
+         dpo_proto_t dproto = fib_proto_to_dpo(fp);
 
-          dpo_set (&dpo, DPO_ADJACENCY, dproto, t->mcast_adj_index);
+         if (vtep_addr_ref(&t->dst) == 1)
+           {
+               u8 mcast_mac[6];
+
+               ip46_multicast_ethernet_address(mcast_mac, &t->dst);
+               receive_dpo_add_or_lock(dproto, ~0, NULL, &dpo);
+              mcast_remote_t new_ep = {
+                .ip = t->dst,
+                .mcast_adj_index = adj_rewrite_add_and_lock
+                   (fp, fib_proto_to_link(fp), a->mcast_sw_if_index, mcast_mac),
+                 /* Add VRF local mcast adj. */
+                 .fib_entry_index = fib_table_entry_special_dpo_add
+                   (t->encap_fib_index, &tun_dst_pfx,
+                    FIB_SOURCE_SPECIAL, FIB_ENTRY_FLAG_NONE, &dpo)
+                  };
+              mcast_ep_add(&new_ep);
+              dpo_reset(&dpo);
+           }
+          /* Stack shared mcast dst mac addr rewrite on encap */
+          dpo_set (&dpo, DPO_ADJACENCY, dproto,
+           mcast_ep_get(&t->dst)->mcast_adj_index);
           dpo_stack_from_node (encap_index, &t->next_dpo, &dpo);
-          dpo_reset(&dpo);
-
-          /* Add local mcast adj. */
-          receive_dpo_add_or_lock(dproto, ~0, NULL, &dpo);
-          t->fib_entry_index = fib_table_entry_special_dpo_add
-            (t->encap_fib_index, &tun_dst_pfx, 
-            FIB_SOURCE_SPECIAL, FIB_ENTRY_FLAG_NONE, &dpo);
-          dpo_reset(&dpo);
+          dpo_reset (&dpo);
+         flood_class = VNET_FLOOD_CLASS_TUNNEL_MASTER;
        }
 
       /* Set vxlan tunnel output node */
@@ -505,39 +549,29 @@ int vnet_vxlan_add_del_tunnel
 
       vxm->tunnel_index_by_sw_if_index[t->sw_if_index] = ~0;
 
+      if (!is_ip6)
+        hash_unset (vxm->vxlan4_tunnel_by_key, key4.as_u64);
+      else
+        {
+         hash_unset_mem (vxm->vxlan6_tunnel_by_key, t->key6);
+         clib_mem_free (t->key6);
+       }
+
       if (!ip46_address_is_multicast(&t->dst))
         {
+         vtep_addr_unref(&a->src);
          fib_entry_child_remove(t->fib_entry_index, t->sibling_index);
          fib_table_entry_delete_index(t->fib_entry_index, FIB_SOURCE_RR);
-         fib_node_deinit(&t->node);
         }
-      else if (pvtep == NULL || pvtep[0] == 1)
+      else if (vtep_addr_unref(&t->dst) == 0)
         {
-         adj_unlock(t->mcast_adj_index);
-         fib_table_entry_delete_index(t->fib_entry_index, FIB_SOURCE_SPECIAL);
-         fib_node_deinit(&t->node);
+         mcast_remote_t* ep = mcast_ep_get(&t->dst);
+         adj_unlock(ep->mcast_adj_index);
+         fib_table_entry_delete_index(ep->fib_entry_index, FIB_SOURCE_SPECIAL);
+         mcast_ep_remove(ep);
         }
 
-      if (!is_ip6)
-        {
-          hash_unset (vxm->vxlan4_tunnel_by_key, key4.as_u64);
-         if (pvtep)
-           {
-             pvtep[0]--;
-             if (pvtep[0] == 0)
-               {
-                 if (ip4_address_is_multicast (&a->dst.ip4))
-                   hash_unset (vxm->vtep4, a->dst.ip4.as_u32);
-                 else
-                   hash_unset (vxm->vtep4, a->src.ip4.as_u32);
-               }
-           }
-       }
-      else
-        {
-         hash_unset_mem (vxm->vxlan6_tunnel_by_key, t->key6);
-         clib_mem_free (t->key6);
-       }
+      fib_node_deinit(&t->node);
       vec_free (t->rewrite);
       pool_put (vxm->tunnels, t);
     }
@@ -848,6 +882,9 @@ clib_error_t *vxlan_init (vlib_main_t *vm)
   vxm->vxlan6_tunnel_by_key = hash_create_mem(0,
         sizeof(vxlan6_tunnel_key_t),
         sizeof(uword));
+  vxm->mcast_ep_by_ip = hash_create_mem(0,
+        sizeof(ip46_address_t),
+       sizeof(uword));
 
   udp_register_dst_port (vm, UDP_DST_PORT_vxlan, 
                          vxlan4_input_node.index, /* is_ip4 */ 1);
index f475bba..7605d94 100644 (file)
@@ -137,19 +137,29 @@ typedef enum {
   VXLAN_N_ERROR,
 } vxlan_input_error_t;
 
+typedef struct {
+  ip46_address_t ip;
+  fib_node_index_t fib_entry_index;
+  adj_index_t mcast_adj_index;
+} mcast_remote_t;
+
 typedef struct {
   /* vector of encap tunnel instances */
-  vxlan_tunnel_t *tunnels;
+  vxlan_tunnel_t * tunnels;
 
   /* lookup tunnel by key */
   uword * vxlan4_tunnel_by_key; /* keyed on ipv4.dst + vni */
   uword * vxlan6_tunnel_by_key; /* keyed on ipv6.dst + vni */
 
-  /* local VTEP IPs used by vxlan-bypass node to check if received
-     VXLAN packet DIP matches any local VTEP address */
+  /* local VTEP IPs ref count used by vxlan-bypass node to check if
+     received VXLAN packet DIP matches any local VTEP address */
   uword * vtep4;  /* local ip4 VTEPs keyed on their ip4 addr */
   uword * vtep6;  /* local ip6 VTEPs keyed on their ip6 addr */
 
+  /* set of active remote mcast VTEP */
+  mcast_remote_t * mcast_eps;
+  uword * mcast_ep_by_ip; /* mcast VTEPs keyed on their ip46 addr */
+
   /* Free vlib hw_if_indices */
   u32 * free_vxlan_tunnel_hw_if_indices;