GBP: redirect contracts 07/15807/9
authorNeale Ranns <nranns@cisco.com>
Wed, 7 Nov 2018 17:25:54 +0000 (09:25 -0800)
committerDamjan Marion <dmarion@me.com>
Thu, 15 Nov 2018 17:22:55 +0000 (17:22 +0000)
Change-Id: I463b153de93cfec29a9c15e8e84e41f6003d4c5f
Signed-off-by: Neale Ranns <nranns@cisco.com>
23 files changed:
extras/vom/vom/gbp_contract_cmds.cpp
src/plugins/gbp/gbp.api
src/plugins/gbp/gbp_api.c
src/plugins/gbp/gbp_bridge_domain.c
src/plugins/gbp/gbp_bridge_domain.h
src/plugins/gbp/gbp_classify.c
src/plugins/gbp/gbp_contract.c
src/plugins/gbp/gbp_contract.h
src/plugins/gbp/gbp_endpoint.c
src/plugins/gbp/gbp_endpoint.h
src/plugins/gbp/gbp_endpoint_group.c
src/plugins/gbp/gbp_endpoint_group.h
src/plugins/gbp/gbp_learn.c
src/plugins/gbp/gbp_policy.c
src/plugins/gbp/gbp_policy_dpo.c
src/plugins/gbp/gbp_policy_dpo.h
src/plugins/gbp/gbp_recirc.c
src/plugins/gbp/gbp_route_domain.c
src/plugins/gbp/gbp_route_domain.h
src/plugins/gbp/gbp_vxlan.c
src/vnet/adj/adj_nbr.c
test/test_gbp.py
test/vpp_papi_provider.py

index dea5350..f990924 100644 (file)
@@ -39,7 +39,7 @@ create_cmd::operator==(const create_cmd& other) const
 rc_t
 create_cmd::issue(connection& con)
 {
-  msg_t req(con.ctx(), std::ref(*this));
+  msg_t req(con.ctx(), 1, std::ref(*this));
 
   auto& payload = req.get_request().get_payload();
   payload.is_add = 1;
@@ -82,7 +82,7 @@ delete_cmd::operator==(const delete_cmd& other) const
 rc_t
 delete_cmd::issue(connection& con)
 {
-  msg_t req(con.ctx(), std::ref(*this));
+  msg_t req(con.ctx(), 1, std::ref(*this));
 
   auto& payload = req.get_request().get_payload();
   payload.is_add = 0;
index 1fb9073..9af8b35 100644 (file)
@@ -251,11 +251,47 @@ define gbp_subnet_details
   vl_api_gbp_subnet_t subnet;
 };
 
-typeonly define gbp_contract
+typedef gbp_next_hop
+{
+  vl_api_address_t ip;
+  vl_api_mac_address_t mac;
+  u32 bd_id;
+  u32 rd_id;
+};
+
+enum gbp_hash_mode
+{
+  GBP_API_HASH_MODE_SRC_IP,
+  GBP_API_HASH_MODE_DST_IP,
+};
+
+typedef gbp_next_hop_set
+{
+  vl_api_gbp_hash_mode_t hash_mode;
+  u8 n_nhs;
+  vl_api_gbp_next_hop_t nhs[8];
+};
+
+enum gbp_rule_action
+{
+  GBP_API_RULE_PERMIT,
+  GBP_API_RULE_DENY,
+  GBP_API_RULE_REDIRECT,
+};
+
+typedef gbp_rule
+{
+  vl_api_gbp_rule_action_t action;
+  vl_api_gbp_next_hop_set_t nh_set;
+};
+
+typedef gbp_contract
 {
   u16 src_epg;
   u16 dst_epg;
   u32 acl_index;
+  u8  n_rules;
+  vl_api_gbp_rule_t rules[n_rules];
 };
 
 autoreply define gbp_contract_add_del
index 47823d5..6a00072 100644 (file)
@@ -156,16 +156,21 @@ vl_api_gbp_endpoint_add_t_handler (vl_api_gbp_endpoint_add_t * mp)
       ip_address_decode (&mp->endpoint.tun.src, &tun_src);
       ip_address_decode (&mp->endpoint.tun.dst, &tun_dst);
 
-      rv = gbp_endpoint_update (sw_if_index, ips, &mac,
-                               ntohs (mp->endpoint.epg_id),
-                               gef, &tun_src, &tun_dst, &handle);
+      rv = gbp_endpoint_update_and_lock (GBP_ENDPOINT_SRC_CP,
+                                        sw_if_index, ips, &mac,
+                                        INDEX_INVALID, INDEX_INVALID,
+                                        ntohs (mp->endpoint.epg_id),
+                                        gef, &tun_src, &tun_dst, &handle);
     }
   else
     {
-      rv = gbp_endpoint_update (sw_if_index, ips, &mac,
-                               ntohs (mp->endpoint.epg_id),
-                               gef, NULL, NULL, &handle);
+      rv = gbp_endpoint_update_and_lock (GBP_ENDPOINT_SRC_CP,
+                                        sw_if_index, ips, &mac,
+                                        INDEX_INVALID, INDEX_INVALID,
+                                        ntohs (mp->endpoint.epg_id),
+                                        gef, NULL, NULL, &handle);
     }
+  vec_free (ips);
   BAD_SW_IF_INDEX_LABEL;
 
   /* *INDENT-OFF* */
@@ -182,7 +187,7 @@ vl_api_gbp_endpoint_del_t_handler (vl_api_gbp_endpoint_del_t * mp)
   vl_api_gbp_endpoint_del_reply_t *rmp;
   int rv = 0;
 
-  gbp_endpoint_delete (ntohl (mp->handle));
+  gbp_endpoint_unlock (GBP_ENDPOINT_SRC_CP, ntohl (mp->handle));
 
   REPLY_MACRO (VL_API_GBP_ENDPOINT_DEL_REPLY + GBP_MSG_BASE);
 }
@@ -210,6 +215,8 @@ static walk_rc_t
 gbp_endpoint_send_details (index_t gei, void *args)
 {
   vl_api_gbp_endpoint_details_t *mp;
+  gbp_endpoint_loc_t *gel;
+  gbp_endpoint_fwd_t *gef;
   gbp_endpoint_t *ge;
   gbp_walk_ctx_t *ctx;
   u8 n_ips, ii;
@@ -217,7 +224,7 @@ gbp_endpoint_send_details (index_t gei, void *args)
   ctx = args;
   ge = gbp_endpoint_get (gei);
 
-  n_ips = vec_len (ge->ge_ips);
+  n_ips = vec_len (ge->ge_key.gek_ips);
   mp = vl_msg_api_alloc (sizeof (*mp) + (sizeof (*mp->endpoint.ips) * n_ips));
   if (!mp)
     return 1;
@@ -226,28 +233,32 @@ gbp_endpoint_send_details (index_t gei, void *args)
   mp->_vl_msg_id = ntohs (VL_API_GBP_ENDPOINT_DETAILS + GBP_MSG_BASE);
   mp->context = ctx->context;
 
+  gel = &ge->ge_locs[0];
+  gef = &ge->ge_fwd;
+
   if (gbp_endpoint_is_remote (ge))
     {
-      mp->endpoint.sw_if_index = ntohl (ge->tun.ge_parent_sw_if_index);
-      ip_address_encode (&ge->tun.ge_src, IP46_TYPE_ANY,
+      mp->endpoint.sw_if_index = ntohl (gel->tun.gel_parent_sw_if_index);
+      ip_address_encode (&gel->tun.gel_src, IP46_TYPE_ANY,
                         &mp->endpoint.tun.src);
-      ip_address_encode (&ge->tun.ge_dst, IP46_TYPE_ANY,
+      ip_address_encode (&gel->tun.gel_dst, IP46_TYPE_ANY,
                         &mp->endpoint.tun.dst);
     }
   else
     {
-      mp->endpoint.sw_if_index = ntohl (ge->ge_sw_if_index);
+      mp->endpoint.sw_if_index = ntohl (gef->gef_itf);
     }
-  mp->endpoint.epg_id = ntohs (ge->ge_epg_id);
+  mp->endpoint.epg_id = ntohs (ge->ge_fwd.gef_epg_id);
   mp->endpoint.n_ips = n_ips;
-  mp->endpoint.flags = gbp_endpoint_flags_encode (ge->ge_flags);
+  mp->endpoint.flags = gbp_endpoint_flags_encode (gef->gef_flags);
   mp->handle = htonl (gei);
   mp->age = vlib_time_now (vlib_get_main ()) - ge->ge_last_time;
-  mac_address_encode (&ge->ge_mac, &mp->endpoint.mac);
+  mac_address_encode (&ge->ge_key.gek_mac, &mp->endpoint.mac);
 
-  vec_foreach_index (ii, ge->ge_ips)
+  vec_foreach_index (ii, ge->ge_key.gek_ips)
   {
-    ip_address_encode (&ge->ge_ips[ii], IP46_TYPE_ANY, &mp->endpoint.ips[ii]);
+    ip_address_encode (&ge->ge_key.gek_ips[ii].fp_addr,
+                      IP46_TYPE_ANY, &mp->endpoint.ips[ii]);
   }
 
   vl_api_send_msg (ctx->reg, (u8 *) mp);
@@ -672,20 +683,186 @@ vl_api_gbp_recirc_dump_t_handler (vl_api_gbp_recirc_dump_t * mp)
   gbp_recirc_walk (gbp_recirc_send_details, &ctx);
 }
 
+static int
+gbp_contract_rule_action_deocde (vl_api_gbp_rule_action_t in,
+                                gbp_rule_action_t * out)
+{
+  in = clib_net_to_host_u32 (in);
+
+  switch (in)
+    {
+    case GBP_API_RULE_PERMIT:
+      *out = GBP_RULE_PERMIT;
+      return (0);
+    case GBP_API_RULE_DENY:
+      *out = GBP_RULE_DENY;
+      return (0);
+    case GBP_API_RULE_REDIRECT:
+      *out = GBP_RULE_REDIRECT;
+      return (0);
+    }
+
+  return (-1);
+}
+
+static int
+gbp_hash_mode_decode (vl_api_gbp_hash_mode_t in, gbp_hash_mode_t * out)
+{
+  in = clib_net_to_host_u32 (in);
+
+  switch (in)
+    {
+    case GBP_API_HASH_MODE_SRC_IP:
+      *out = GBP_HASH_MODE_SRC_IP;
+      return (0);
+    case GBP_API_HASH_MODE_DST_IP:
+      *out = GBP_HASH_MODE_DST_IP;
+      return (0);
+    }
+
+  return (-2);
+}
+
+static int
+gbp_next_hop_decode (const vl_api_gbp_next_hop_t * in, index_t * gnhi)
+{
+  ip46_address_t ip;
+  mac_address_t mac;
+  index_t grd, gbd;
+
+  gbd = gbp_bridge_domain_find_and_lock (ntohl (in->bd_id));
+
+  if (INDEX_INVALID == gbd)
+    return (VNET_API_ERROR_BD_NOT_MODIFIABLE);
+
+  grd = gbp_route_domain_find_and_lock (ntohl (in->rd_id));
+
+  if (INDEX_INVALID == grd)
+    return (VNET_API_ERROR_NO_SUCH_FIB);
+
+  ip_address_decode (&in->ip, &ip);
+  mac_address_decode (&in->mac, &mac);
+
+  *gnhi = gbp_next_hop_alloc (&ip, grd, &mac, gbd);
+
+  return (0);
+}
+
+static int
+gbp_next_hop_set_decode (const vl_api_gbp_next_hop_set_t * in,
+                        gbp_hash_mode_t * hash_mode, index_t ** out)
+{
+
+  index_t *gnhis = NULL;
+  int rv;
+  u8 ii;
+
+  rv = gbp_hash_mode_decode (in->hash_mode, hash_mode);
+
+  if (0 != rv)
+    return rv;
+
+  vec_validate (gnhis, in->n_nhs - 1);
+
+  for (ii = 0; ii < in->n_nhs; ii++)
+    {
+      rv = gbp_next_hop_decode (&in->nhs[ii], &gnhis[ii]);
+
+      if (0 != rv)
+       {
+         vec_free (gnhis);
+         break;
+       }
+    }
+
+  *out = gnhis;
+  return (rv);
+}
+
+static int
+gbp_contract_rule_decode (const vl_api_gbp_rule_t * in, index_t * gui)
+{
+  gbp_hash_mode_t hash_mode;
+  gbp_rule_action_t action;
+  index_t *nhs = NULL;
+  int rv;
+
+  rv = gbp_contract_rule_action_deocde (in->action, &action);
+
+  if (0 != rv)
+    return rv;
+
+  if (GBP_RULE_REDIRECT == action)
+    {
+      rv = gbp_next_hop_set_decode (&in->nh_set, &hash_mode, &nhs);
+
+      if (0 != rv)
+       return (rv);
+    }
+  else
+    {
+      hash_mode = GBP_HASH_MODE_SRC_IP;
+    }
+
+  *gui = gbp_rule_alloc (action, hash_mode, nhs);
+
+  return (rv);
+}
+
+static int
+gbp_contract_rules_decode (u8 n_rules,
+                          const vl_api_gbp_rule_t * rules, index_t ** out)
+{
+  index_t *guis = NULL;
+  int rv;
+  u8 ii;
+
+  if (0 == n_rules)
+    {
+      *out = NULL;
+      return (0);
+    }
+
+  vec_validate (guis, n_rules - 1);
+
+  for (ii = 0; ii < n_rules; ii++)
+    {
+      rv = gbp_contract_rule_decode (&rules[ii], &guis[ii]);
+
+      if (0 != rv)
+       {
+         vec_free (guis);
+         return (rv);
+       }
+    }
+
+  *out = guis;
+  return (rv);
+}
+
 static void
 vl_api_gbp_contract_add_del_t_handler (vl_api_gbp_contract_add_del_t * mp)
 {
   vl_api_gbp_contract_add_del_reply_t *rmp;
+  index_t *rules;
   int rv = 0;
 
   if (mp->is_add)
-    gbp_contract_update (ntohs (mp->contract.src_epg),
-                        ntohs (mp->contract.dst_epg),
-                        ntohl (mp->contract.acl_index));
+    {
+      rv = gbp_contract_rules_decode (mp->contract.n_rules,
+                                     mp->contract.rules, &rules);
+      if (0 != rv)
+       goto out;
+
+      rv = gbp_contract_update (ntohs (mp->contract.src_epg),
+                               ntohs (mp->contract.dst_epg),
+                               ntohl (mp->contract.acl_index), rules);
+    }
   else
-    gbp_contract_delete (ntohs (mp->contract.src_epg),
-                        ntohs (mp->contract.dst_epg));
+    rv = gbp_contract_delete (ntohs (mp->contract.src_epg),
+                             ntohs (mp->contract.dst_epg));
 
+out:
   REPLY_MACRO (VL_API_GBP_CONTRACT_ADD_DEL_REPLY + GBP_MSG_BASE);
 }
 
@@ -706,7 +883,7 @@ gbp_contract_send_details (gbp_contract_t * gbpc, void *args)
 
   mp->contract.src_epg = ntohs (gbpc->gc_key.gck_src);
   mp->contract.dst_epg = ntohs (gbpc->gc_key.gck_dst);
-  mp->contract.acl_index = ntohl (gbpc->gc_value.gc_acl_index);
+  // mp->contract.acl_index = ntohl (gbpc->gc_value.gc_acl_index);
 
   vl_api_send_msg (ctx->reg, (u8 *) mp);
 
index 1a1a7bd..298819f 100644 (file)
@@ -41,6 +41,12 @@ vlib_log_class_t gb_logger;
 #define GBP_BD_DBG(...)                           \
     vlib_log_debug (gb_logger, __VA_ARGS__);
 
+index_t
+gbp_bridge_domain_index (const gbp_bridge_domain_t * gbd)
+{
+  return (gbd - gbp_bridge_domain_pool);
+}
+
 static void
 gbp_bridge_domain_lock (index_t i)
 {
@@ -96,6 +102,38 @@ gbp_bridge_domain_db_remove (gbp_bridge_domain_t * gb)
   gbp_bridge_domain_db.gbd_by_bd_index[gb->gb_bd_index] = INDEX_INVALID;
 }
 
+static u8 *
+format_gbp_bridge_domain_ptr (u8 * s, va_list * args)
+{
+  gbp_bridge_domain_t *gb = va_arg (*args, gbp_bridge_domain_t *);
+  vnet_main_t *vnm = vnet_get_main ();
+
+  if (NULL != gb)
+    s = format (s, "[%d] bd:[%d,%d], bvi:%U uu-flood:%U locks:%d",
+               gb - gbp_bridge_domain_pool,
+               gb->gb_bd_id,
+               gb->gb_bd_index,
+               format_vnet_sw_if_index_name, vnm, gb->gb_bvi_sw_if_index,
+               format_vnet_sw_if_index_name, vnm, gb->gb_uu_fwd_sw_if_index,
+               gb->gb_locks);
+  else
+    s = format (s, "NULL");
+
+  return (s);
+}
+
+u8 *
+format_gbp_bridge_domain (u8 * s, va_list * args)
+{
+  index_t gbi = va_arg (*args, index_t);
+
+  s =
+    format (s, "%U", format_gbp_bridge_domain_ptr,
+           gbp_bridge_domain_get (gbi));
+
+  return (s);
+}
+
 int
 gbp_bridge_domain_add_and_lock (u32 bd_id,
                                gbp_bridge_domain_flags_t flags,
@@ -158,7 +196,7 @@ gbp_bridge_domain_add_and_lock (u32 bd_id,
       gb->gb_locks++;
     }
 
-  GBP_BD_DBG ("add: %U", format_gbp_bridge_domain, gb);
+  GBP_BD_DBG ("add: %U", format_gbp_bridge_domain_ptr, gb);
 
   return (0);
 }
@@ -174,7 +212,7 @@ gbp_bridge_domain_unlock (index_t index)
 
   if (0 == gb->gb_locks)
     {
-      GBP_BD_DBG ("destroy: %U", format_gbp_bridge_domain, gb);
+      GBP_BD_DBG ("destroy: %U", format_gbp_bridge_domain_ptr, gb);
 
       l2fib_del_entry (vnet_sw_interface_get_hw_address
                       (vnet_get_main (), gb->gb_bvi_sw_if_index),
@@ -204,8 +242,7 @@ gbp_bridge_domain_delete (u32 bd_id)
 
   if (INDEX_INVALID != gbi)
     {
-      GBP_BD_DBG ("del: %U", format_gbp_bridge_domain,
-                 gbp_bridge_domain_get (gbi));
+      GBP_BD_DBG ("del: %U", format_gbp_bridge_domain, gbi);
       gbp_bridge_domain_unlock (gbi);
 
       return (0);
@@ -287,33 +324,13 @@ VLIB_CLI_COMMAND (gbp_bridge_domain_cli_node, static) = {
   .function = gbp_bridge_domain_cli,
 };
 
-u8 *
-format_gbp_bridge_domain (u8 * s, va_list * args)
-{
-  gbp_bridge_domain_t *gb = va_arg (*args, gbp_bridge_domain_t*);
-  vnet_main_t *vnm = vnet_get_main ();
-
-  if (NULL != gb)
-    s = format (s, "[%d] bd:[%d,%d], bvi:%U uu-flood:%U locks:%d",
-                gb - gbp_bridge_domain_pool,
-                gb->gb_bd_id,
-                gb->gb_bd_index,
-                format_vnet_sw_if_index_name, vnm, gb->gb_bvi_sw_if_index,
-                format_vnet_sw_if_index_name, vnm, gb->gb_uu_fwd_sw_if_index,
-                gb->gb_locks);
-  else
-    s = format (s, "NULL");
-
-  return (s);
-}
-
 static int
 gbp_bridge_domain_show_one (gbp_bridge_domain_t *gb, void *ctx)
 {
   vlib_main_t *vm;
 
   vm = ctx;
-  vlib_cli_output (vm, "  %U",format_gbp_bridge_domain, gb);
+  vlib_cli_output (vm, "  %U", format_gbp_bridge_domain_ptr, gb);
 
   return (1);
 }
index 4135c2d..cc03320 100644 (file)
@@ -77,6 +77,7 @@ extern int gbp_bridge_domain_add_and_lock (u32 bd_id,
 extern void gbp_bridge_domain_unlock (index_t gbi);
 extern index_t gbp_bridge_domain_find_and_lock (u32 bd_id);
 extern int gbp_bridge_domain_delete (u32 bd_id);
+extern index_t gbp_bridge_domain_index (const gbp_bridge_domain_t *);
 
 typedef int (*gbp_bridge_domain_cb_t) (gbp_bridge_domain_t * gb, void *ctx);
 extern void gbp_bridge_domain_walk (gbp_bridge_domain_cb_t bgpe, void *ctx);
index fb57426..4846911 100644 (file)
@@ -154,7 +154,7 @@ gbp_classify_inline (vlib_main_t * vm,
                }
 
              if (PREDICT_TRUE (NULL != ge0))
-               src_epg = ge0->ge_epg_id;
+               src_epg = ge0->ge_fwd.gef_epg_id;
              else
                src_epg = EPG_INVALID;
            }
index 32dd944..ec54569 100644 (file)
  */
 
 #include <plugins/gbp/gbp.h>
+#include <plugins/gbp/gbp_bridge_domain.h>
+#include <plugins/gbp/gbp_route_domain.h>
+#include <plugins/gbp/gbp_policy_dpo.h>
+
+#include <vnet/dpo/load_balance.h>
+#include <vnet/dpo/drop_dpo.h>
 
 /**
  * Single contract DB instance
  */
 gbp_contract_db_t gbp_contract_db;
 
-void
-gbp_contract_update (epg_id_t src_epg, epg_id_t dst_epg, u32 acl_index)
+gbp_contract_t *gbp_contract_pool;
+
+vlib_log_class_t gc_logger;
+
+fib_node_type_t gbp_next_hop_fib_type;
+
+gbp_rule_t *gbp_rule_pool;
+gbp_next_hop_t *gbp_next_hop_pool;
+
+#define GBP_CONTRACT_DBG(...)                           \
+    vlib_log_notice (gc_logger, __VA_ARGS__);
+
+index_t
+gbp_rule_alloc (gbp_rule_action_t action,
+               gbp_hash_mode_t hash_mode, index_t * nhs)
 {
-  gbp_main_t *gm = &gbp_main;
-  u32 *acl_vec = 0;
-  gbp_contract_value_t value = {
-    .gc_lc_index = ~0,
-    .gc_acl_index = ~0,
+  gbp_rule_t *gu;
+
+  pool_get_zero (gbp_rule_pool, gu);
+
+  gu->gu_nhs = nhs;
+  gu->gu_action = action;
+
+  return (gu - gbp_rule_pool);
+}
+
+index_t
+gbp_next_hop_alloc (const ip46_address_t * ip,
+                   index_t grd, const mac_address_t * mac, index_t gbd)
+{
+  fib_protocol_t fproto;
+  gbp_next_hop_t *gnh;
+
+  pool_get_zero (gbp_next_hop_pool, gnh);
+
+  fib_node_init (&gnh->gnh_node, gbp_next_hop_fib_type);
+
+  ip46_address_copy (&gnh->gnh_ip, ip);
+  mac_address_copy (&gnh->gnh_mac, mac);
+
+  gnh->gnh_rd = grd;
+  gnh->gnh_bd = gbd;
+
+  FOR_EACH_FIB_IP_PROTOCOL (fproto) gnh->gnh_ai[fproto] = INDEX_INVALID;
+
+  return (gnh - gbp_next_hop_pool);
+}
+
+static inline gbp_next_hop_t *
+gbp_next_hop_get (index_t gui)
+{
+  return (pool_elt_at_index (gbp_next_hop_pool, gui));
+}
+
+static void
+gbp_contract_rules_free (index_t * rules)
+{
+  index_t *gui, *gnhi;
+
+  vec_foreach (gui, rules)
+  {
+    gbp_policy_node_t pnode;
+    fib_protocol_t fproto;
+    gbp_next_hop_t *gnh;
+    gbp_rule_t *gu;
+
+    gu = gbp_rule_get (*gui);
+
+    FOR_EACH_GBP_POLICY_NODE (pnode)
+    {
+      FOR_EACH_FIB_IP_PROTOCOL (fproto)
+      {
+       dpo_reset (&gu->gu_dpo[pnode][fproto]);
+       dpo_reset (&gu->gu_dpo[pnode][fproto]);
+      }
+    }
+
+    vec_foreach (gnhi, gu->gu_nhs)
+    {
+      fib_protocol_t fproto;
+
+      gnh = gbp_next_hop_get (*gnhi);
+      gbp_bridge_domain_unlock (gnh->gnh_bd);
+      gbp_route_domain_unlock (gnh->gnh_rd);
+      gbp_endpoint_child_remove (gnh->gnh_ge, gnh->gnh_sibling);
+      gbp_endpoint_unlock (GBP_ENDPOINT_SRC_RR, gnh->gnh_ge);
+
+      FOR_EACH_FIB_IP_PROTOCOL (fproto)
+      {
+       adj_unlock (gnh->gnh_ai[fproto]);
+      }
+    }
+  }
+  vec_free (rules);
+}
+
+static u8 *
+format_gbp_next_hop (u8 * s, va_list * args)
+{
+  index_t gnhi = va_arg (*args, index_t);
+  gbp_next_hop_t *gnh;
+
+  gnh = gbp_next_hop_get (gnhi);
+
+  s = format (s, "%U, %U, %U EP:%d",
+             format_mac_address_t, &gnh->gnh_mac,
+             format_gbp_bridge_domain, gnh->gnh_bd,
+             format_ip46_address, &gnh->gnh_ip, IP46_TYPE_ANY, gnh->gnh_ge);
+
+  return (s);
+}
+
+static u8 *
+format_gbp_rule_action (u8 * s, va_list * args)
+{
+  gbp_rule_action_t action = va_arg (*args, gbp_rule_action_t);
+
+  switch (action)
+    {
+#define _(v,a) case GBP_RULE_##v: return (format (s, "%s", a));
+      foreach_gbp_rule_action
+#undef _
+    }
+
+  return (format (s, "unknown"));
+}
+
+static u8 *
+format_gbp_hash_mode (u8 * s, va_list * args)
+{
+  gbp_hash_mode_t action = va_arg (*args, gbp_hash_mode_t);
+
+  switch (action)
+    {
+#define _(v,a) case GBP_HASH_MODE_##v: return (format (s, "%s", a));
+      foreach_gbp_hash_mode
+#undef _
+    }
+
+  return (format (s, "unknown"));
+}
+
+static u8 *
+format_gbp_policy_node (u8 * s, va_list * args)
+{
+  gbp_policy_node_t action = va_arg (*args, gbp_policy_node_t);
+
+  switch (action)
+    {
+#define _(v,a) case GBP_POLICY_NODE_##v: return (format (s, "%s", a));
+      foreach_gbp_policy_node
+#undef _
+    }
+
+  return (format (s, "unknown"));
+}
+
+static u8 *
+format_gbp_rule (u8 * s, va_list * args)
+{
+  index_t gui = va_arg (*args, index_t);
+  gbp_policy_node_t pnode;
+  fib_protocol_t fproto;
+  gbp_rule_t *gu;
+  index_t *gnhi;
+
+  gu = gbp_rule_get (gui);
+  s = format (s, "%U", format_gbp_rule_action, gu->gu_action);
+
+  switch (gu->gu_action)
+    {
+    case GBP_RULE_PERMIT:
+    case GBP_RULE_DENY:
+      break;
+    case GBP_RULE_REDIRECT:
+      s = format (s, ", %U", format_gbp_hash_mode, gu->gu_hash_mode);
+      break;
+    }
+
+  vec_foreach (gnhi, gu->gu_nhs)
+  {
+    s = format (s, "\n      [%U]", format_gbp_next_hop, *gnhi);
+  }
+
+  FOR_EACH_GBP_POLICY_NODE (pnode)
+  {
+    s = format (s, "\n    policy-%U", format_gbp_policy_node, pnode);
+
+    FOR_EACH_FIB_IP_PROTOCOL (fproto)
+    {
+      if (dpo_id_is_valid (&gu->gu_dpo[pnode][fproto]))
+       {
+         s =
+           format (s, "\n      %U", format_dpo_id,
+                   &gu->gu_dpo[pnode][fproto], 8);
+       }
+    }
+  }
+
+  return (s);
+}
+
+static void
+gbp_contract_mk_adj (gbp_next_hop_t * gnh, fib_protocol_t fproto)
+{
+  ethernet_header_t *eth;
+  gbp_endpoint_t *ge;
+  index_t old_ai;
+  u8 *rewrite;
+
+  old_ai = gnh->gnh_ai[fproto];
+  rewrite = NULL;
+  vec_validate (rewrite, sizeof (*eth) - 1);
+  eth = (ethernet_header_t *) rewrite;
+
+  GBP_CONTRACT_DBG ("...mk-adj: %U", format_gbp_next_hop,
+                   gnh - gbp_next_hop_pool);
+
+  ge = gbp_endpoint_get (gnh->gnh_ge);
+
+  eth->type = clib_host_to_net_u16 ((fproto == FIB_PROTOCOL_IP4 ?
+                                    ETHERNET_TYPE_IP4 : ETHERNET_TYPE_IP6));
+  mac_address_to_bytes (gbp_route_domain_get_local_mac (), eth->src_address);
+  mac_address_to_bytes (&gnh->gnh_mac, eth->dst_address);
+
+  gnh->gnh_ai[fproto] =
+    adj_nbr_add_or_lock_w_rewrite (fproto,
+                                  fib_proto_to_link (fproto),
+                                  &gnh->gnh_ip, ge->ge_fwd.gef_itf, rewrite);
+
+  adj_unlock (old_ai);
+}
+
+static void
+gbp_contract_mk_lb (index_t gui, fib_protocol_t fproto)
+{
+  load_balance_path_t *paths = NULL;
+  gbp_policy_node_t pnode;
+  gbp_next_hop_t *gnh;
+  dpo_proto_t dproto;
+  gbp_rule_t *gu;
+  u32 ii;
+
+  u32 policy_nodes[] = {
+    [GBP_POLICY_NODE_L2] = gbp_policy_port_node.index,
+    [GBP_POLICY_NODE_IP4] = ip4_gbp_policy_dpo_node.index,
+    [GBP_POLICY_NODE_IP6] = ip6_gbp_policy_dpo_node.index,
   };
+
+  GBP_CONTRACT_DBG ("..mk-lb: %U", format_gbp_rule, gui);
+
+  gu = gbp_rule_get (gui);
+  dproto = fib_proto_to_dpo (fproto);
+
+  if (GBP_RULE_REDIRECT != gu->gu_action)
+    return;
+
+  vec_foreach_index (ii, gu->gu_nhs)
+  {
+    gnh = gbp_next_hop_get (gu->gu_nhs[ii]);
+
+    gbp_contract_mk_adj (gnh, FIB_PROTOCOL_IP4);
+    gbp_contract_mk_adj (gnh, FIB_PROTOCOL_IP6);
+  }
+
+  FOR_EACH_GBP_POLICY_NODE (pnode)
+  {
+    vec_validate (paths, vec_len (gu->gu_nhs) - 1);
+
+    vec_foreach_index (ii, gu->gu_nhs)
+    {
+      gnh = gbp_next_hop_get (gu->gu_nhs[ii]);
+
+      paths[ii].path_index = FIB_NODE_INDEX_INVALID;
+      paths[ii].path_weight = 1;
+      dpo_set (&paths[ii].path_dpo, DPO_ADJACENCY,
+              dproto, gnh->gnh_ai[fproto]);
+    }
+
+    // FIXME get algo and sticky bit from contract LB algo
+    if (!dpo_id_is_valid (&gu->gu_dpo[pnode][fproto]))
+      {
+       dpo_id_t dpo = DPO_INVALID;
+
+       dpo_set (&dpo, DPO_LOAD_BALANCE, dproto,
+                load_balance_create (vec_len (paths),
+                                     dproto, IP_FLOW_HASH_DEFAULT));
+       dpo_stack_from_node (policy_nodes[pnode],
+                            &gu->gu_dpo[pnode][fproto], &dpo);
+       dpo_reset (&dpo);
+      }
+
+    load_balance_multipath_update (&gu->gu_dpo[pnode][fproto],
+                                  paths, LOAD_BALANCE_FLAG_NONE);
+    vec_free (paths);
+  }
+}
+
+static void
+gbp_contract_mk_one_lb (index_t gui)
+{
+  gbp_contract_mk_lb (gui, FIB_PROTOCOL_IP4);
+  gbp_contract_mk_lb (gui, FIB_PROTOCOL_IP6);
+}
+
+static int
+gbp_contract_next_hop_resolve (index_t gui, index_t gnhi)
+{
+  gbp_bridge_domain_t *gbd;
+  gbp_next_hop_t *gnh;
+  ip46_address_t *ips;
+  int rv;
+
+  ips = NULL;
+  gnh = gbp_next_hop_get (gnhi);
+  gbd = gbp_bridge_domain_get (gnh->gnh_bd);
+
+  gnh->gnh_gu = gui;
+  vec_add1 (ips, gnh->gnh_ip);
+
+  /*
+   * source the endpoint this contract needs to forward via.
+   * give ofrwarding details via the spine proxy. if this EP is known
+   * to us, then since we source here with a low priority, the learned
+   * info will take precedenc.
+   */
+  rv = gbp_endpoint_update_and_lock (GBP_ENDPOINT_SRC_RR,
+                                    gbd->gb_uu_fwd_sw_if_index,
+                                    ips,
+                                    &gnh->gnh_mac,
+                                    gnh->gnh_bd, gnh->gnh_rd, EPG_INVALID,
+                                    GBP_ENDPOINT_FLAG_NONE, NULL, NULL,
+                                    &gnh->gnh_ge);
+
+  if (0 == rv)
+    {
+      gnh->gnh_sibling = gbp_endpoint_child_add (gnh->gnh_ge,
+                                                gbp_next_hop_fib_type, gnhi);
+    }
+
+  GBP_CONTRACT_DBG ("..resolve: %d: %d: %U", gui, gnhi, format_gbp_next_hop,
+                   gnhi);
+
+  vec_free (ips);
+  return (rv);
+}
+
+static void
+gbp_contract_rule_resolve (index_t gui)
+{
+  gbp_rule_t *gu;
+  index_t *gnhi;
+
+  gu = gbp_rule_get (gui);
+
+  GBP_CONTRACT_DBG ("..resolve: %U", format_gbp_rule, gui);
+
+  vec_foreach (gnhi, gu->gu_nhs)
+  {
+    gbp_contract_next_hop_resolve (gui, *gnhi);
+  }
+}
+
+static void
+gbp_contract_resolve (index_t * guis)
+{
+  index_t *gui;
+
+  vec_foreach (gui, guis)
+  {
+    gbp_contract_rule_resolve (*gui);
+  }
+}
+
+static void
+gbp_contract_mk_lbs (index_t * guis)
+{
+  index_t *gui;
+
+  vec_foreach (gui, guis)
+  {
+    gbp_contract_mk_one_lb (*gui);
+  }
+}
+
+int
+gbp_contract_update (epg_id_t src_epg,
+                    epg_id_t dst_epg, u32 acl_index, index_t * rules)
+{
+  gbp_main_t *gm = &gbp_main;
+  u32 *acl_vec = NULL;
+  gbp_contract_t *gc;
+  index_t gci;
   uword *p;
 
   gbp_contract_key_t key = {
@@ -48,59 +438,74 @@ gbp_contract_update (epg_id_t src_epg, epg_id_t dst_epg, u32 acl_index)
   p = hash_get (gbp_contract_db.gc_hash, key.as_u32);
   if (p != NULL)
     {
-      value.as_u64 = p[0];
+      gci = p[0];
+      gc = gbp_contract_get (gci);
+      gbp_contract_rules_free (gc->gc_rules);
+      gbp_main.acl_plugin.put_lookup_context_index (gc->gc_lc_index);
+      gc->gc_rules = NULL;
     }
   else
     {
-      value.gc_lc_index =
-       gm->acl_plugin.get_lookup_context_index (gm->gbp_acl_user_id, src_epg,
-                                                dst_epg);
-      value.gc_acl_index = acl_index;
-      hash_set (gbp_contract_db.gc_hash, key.as_u32, value.as_u64);
+      pool_get_zero (gbp_contract_pool, gc);
+      gc->gc_key = key;
+      gci = gc - gbp_contract_pool;
+      hash_set (gbp_contract_db.gc_hash, key.as_u32, gci);
     }
 
-  if (value.gc_lc_index == ~0)
-    return;
-  vec_add1 (acl_vec, acl_index);
-  gm->acl_plugin.set_acl_vec_for_context (value.gc_lc_index, acl_vec);
+  GBP_CONTRACT_DBG ("update: %U", format_gbp_contract, gci);
+
+  gc->gc_rules = rules;
+  gbp_contract_resolve (gc->gc_rules);
+  gbp_contract_mk_lbs (gc->gc_rules);
+
+  gc->gc_acl_index = acl_index;
+  gc->gc_lc_index =
+    gm->acl_plugin.get_lookup_context_index (gm->gbp_acl_user_id,
+                                            src_epg, dst_epg);
+
+  vec_add1 (acl_vec, gc->gc_acl_index);
+  gm->acl_plugin.set_acl_vec_for_context (gc->gc_lc_index, acl_vec);
   vec_free (acl_vec);
+
+  return (0);
 }
 
-void
+int
 gbp_contract_delete (epg_id_t src_epg, epg_id_t dst_epg)
 {
-  gbp_main_t *gm = &gbp_main;
-  uword *p;
-  gbp_contract_value_t value;
   gbp_contract_key_t key = {
     .gck_src = src_epg,
     .gck_dst = dst_epg,
   };
+  gbp_contract_t *gc;
+  uword *p;
 
   p = hash_get (gbp_contract_db.gc_hash, key.as_u32);
   if (p != NULL)
     {
-      value.as_u64 = p[0];
-      gm->acl_plugin.put_lookup_context_index (value.gc_lc_index);
+      gc = gbp_contract_get (p[0]);
+
+      gbp_contract_rules_free (gc->gc_rules);
+      gbp_main.acl_plugin.put_lookup_context_index (gc->gc_lc_index);
+
+      hash_unset (gbp_contract_db.gc_hash, key.as_u32);
+      pool_put (gbp_contract_pool, gc);
+
+      return (0);
     }
-  hash_unset (gbp_contract_db.gc_hash, key.as_u32);
+
+  return (VNET_API_ERROR_NO_SUCH_ENTRY);
 }
 
 void
 gbp_contract_walk (gbp_contract_cb_t cb, void *ctx)
 {
-  gbp_contract_key_t key;
-  gbp_contract_value_t value;
+  gbp_contract_t *gc;
 
   /* *INDENT-OFF* */
-  hash_foreach(key.as_u32, value.as_u64, gbp_contract_db.gc_hash,
+  pool_foreach(gc, gbp_contract_pool,
   ({
-    gbp_contract_t gbpc = {
-      .gc_key = key,
-      .gc_value = value,
-    };
-
-    if (!cb(&gbpc, ctx))
+    if (!cb(gc, ctx))
       break;
   }));
   /* *INDENT-ON* */
@@ -137,7 +542,7 @@ gbp_contract_cli (vlib_main_t * vm,
 
   if (add)
     {
-      gbp_contract_update (src_epg_id, dst_epg_id, acl_index);
+      gbp_contract_update (src_epg_id, dst_epg_id, acl_index, NULL);
     }
   else
     {
@@ -164,21 +569,49 @@ VLIB_CLI_COMMAND (gbp_contract_cli_node, static) =
 };
 /* *INDENT-ON* */
 
+static u8 *
+format_gbp_contract_key (u8 * s, va_list * args)
+{
+  gbp_contract_key_t *gck = va_arg (*args, gbp_contract_key_t *);
+
+  s = format (s, "{%d,%d}", gck->gck_src, gck->gck_dst);
+
+  return (s);
+}
+
+u8 *
+format_gbp_contract (u8 * s, va_list * args)
+{
+  index_t gci = va_arg (*args, index_t);
+  gbp_contract_t *gc;
+  index_t *gui;
+
+  gc = gbp_contract_get (gci);
+
+  s = format (s, "%U: acl-index:%d",
+             format_gbp_contract_key, &gc->gc_key, gc->gc_acl_index);
+
+  vec_foreach (gui, gc->gc_rules)
+  {
+    s = format (s, "\n    %d: %U", *gui, format_gbp_rule, *gui);
+  }
+
+  return (s);
+}
+
 static clib_error_t *
 gbp_contract_show (vlib_main_t * vm,
                   unformat_input_t * input, vlib_cli_command_t * cmd)
 {
-  gbp_contract_key_t key;
-  gbp_contract_value_t value;
+  index_t gci;
 
   vlib_cli_output (vm, "Contracts:");
 
   /* *INDENT-OFF* */
-  hash_foreach (key.as_u32, value.as_u64, gbp_contract_db.gc_hash,
-  {
-    vlib_cli_output (vm, "  {%d,%d} -> %d", key.gck_src,
-                     key.gck_dst, value.gc_acl_index);
-  });
+  pool_foreach_index (gci, gbp_contract_pool,
+  ({
+    vlib_cli_output (vm, "  [%d] %U", gci, format_gbp_contract, gci);
+  }));
   /* *INDENT-ON* */
 
   return (NULL);
@@ -199,6 +632,63 @@ VLIB_CLI_COMMAND (gbp_contract_show_node, static) = {
 };
 /* *INDENT-ON* */
 
+static fib_node_t *
+gbp_next_hop_get_node (fib_node_index_t index)
+{
+  gbp_next_hop_t *gnh;
+
+  gnh = gbp_next_hop_get (index);
+
+  return (&gnh->gnh_node);
+}
+
+static void
+gbp_next_hop_last_lock_gone (fib_node_t * node)
+{
+  ASSERT (0);
+}
+
+static gbp_next_hop_t *
+gbp_next_hop_from_fib_node (fib_node_t * node)
+{
+  ASSERT (gbp_next_hop_fib_type == node->fn_type);
+  return ((gbp_next_hop_t *) node);
+}
+
+static fib_node_back_walk_rc_t
+gbp_next_hop_back_walk_notify (fib_node_t * node,
+                              fib_node_back_walk_ctx_t * ctx)
+{
+  gbp_next_hop_t *gnh;
+
+  gnh = gbp_next_hop_from_fib_node (node);
+
+  gbp_contract_mk_one_lb (gnh->gnh_gu);
+
+  return (FIB_NODE_BACK_WALK_CONTINUE);
+}
+
+/*
+ * The FIB path's graph node virtual function table
+ */
+static const fib_node_vft_t gbp_next_hop_vft = {
+  .fnv_get = gbp_next_hop_get_node,
+  .fnv_last_lock = gbp_next_hop_last_lock_gone,
+  .fnv_back_walk = gbp_next_hop_back_walk_notify,
+  // .fnv_mem_show = fib_path_memory_show,
+};
+
+static clib_error_t *
+gbp_contract_init (vlib_main_t * vm)
+{
+  gc_logger = vlib_log_register_class ("gbp", "con");
+  gbp_next_hop_fib_type = fib_node_register_new_type (&gbp_next_hop_vft);
+
+  return (NULL);
+}
+
+VLIB_INIT_FUNCTION (gbp_contract_init);
+
 /*
  * fd.io coding-style-patch-verification: ON
  *
index 121192c..c107351 100644 (file)
@@ -37,24 +37,69 @@ typedef struct gbp_contract_key_t_
   };
 } gbp_contract_key_t;
 
-/**
- * The value for an Contract
- */
-typedef struct gbp_contract_value_t_
+typedef struct gbp_next_hop_t_
 {
-  union
-  {
-    struct
-    {
-      /**
-       * lookup context and acl index
-       */
-      u32 gc_lc_index;
-      u32 gc_acl_index;
-    };
-    u64 as_u64;
-  };
-} gbp_contract_value_t;
+  fib_node_t gnh_node;
+  ip46_address_t gnh_ip;
+  mac_address_t gnh_mac;
+  index_t gnh_gu;
+  index_t gnh_bd;
+  index_t gnh_rd;
+  u32 gnh_ge;
+  u32 gnh_sibling;
+  index_t gnh_ai[FIB_PROTOCOL_IP_MAX];
+} gbp_next_hop_t;
+
+#define foreach_gbp_hash_mode   \
+  _(SRC_IP, "src-ip")           \
+  _(DST_IP, "dst-ip")
+
+typedef enum gbp_hash_mode_t_
+{
+#define _(v,s) GBP_HASH_MODE_##v,
+  foreach_gbp_hash_mode
+#undef _
+} gbp_hash_mode_t;
+
+#define foreach_gbp_rule_action   \
+  _(PERMIT,   "permit")           \
+  _(DENY,     "deny")             \
+  _(REDIRECT, "redirect")
+
+typedef enum gbp_rule_action_t_
+{
+#define _(v,s) GBP_RULE_##v,
+  foreach_gbp_rule_action
+#undef _
+} gbp_rule_action_t;
+
+#define foreach_gbp_policy_node   \
+  _(L2, "L2")                     \
+  _(IP4, "ip4")                   \
+  _(IP6, "ip6")
+
+typedef enum gbp_policy_node_t_
+{
+#define _(v,s) GBP_POLICY_NODE_##v,
+  foreach_gbp_policy_node
+#undef _
+} gbp_policy_node_t;
+#define GBP_POLICY_N_NODES (GBP_POLICY_NODE_IP6+1)
+
+#define FOR_EACH_GBP_POLICY_NODE(pnode)         \
+  for (pnode = GBP_POLICY_NODE_L2; pnode < GBP_POLICY_N_NODES; pnode++)
+
+typedef struct gbp_rule_t_
+{
+  gbp_rule_action_t gu_action;
+  gbp_hash_mode_t gu_hash_mode;
+  index_t *gu_nhs;
+
+  /**
+   * DPO of the load-balance object used to redirect
+   */
+  dpo_id_t gu_dpo[GBP_POLICY_N_NODES][FIB_PROTOCOL_IP_MAX];
+} gbp_rule_t;
 
 /**
  * A Group Based Policy Contract.
@@ -67,10 +112,13 @@ typedef struct gbp_contract_t_
    */
   gbp_contract_key_t gc_key;
 
+  u32 gc_acl_index;
+  u32 gc_lc_index;
+
   /**
    * The ACL to apply for packets from the source to the destination EPG
    */
-  gbp_contract_value_t gc_value;
+  index_t *gc_rules;
 } gbp_contract_t;
 
 /**
@@ -84,21 +132,29 @@ typedef struct gbp_contract_db_t_
   uword *gc_hash;
 } gbp_contract_db_t;
 
-extern void gbp_contract_update (epg_id_t src_epg,
-                                epg_id_t dst_epg, u32 acl_index);
-extern void gbp_contract_delete (epg_id_t src_epg, epg_id_t dst_epg);
+extern int gbp_contract_update (epg_id_t src_epg,
+                               epg_id_t dst_epg,
+                               u32 acl_index, index_t * rules);
+extern int gbp_contract_delete (epg_id_t src_epg, epg_id_t dst_epg);
+
+extern index_t gbp_rule_alloc (gbp_rule_action_t action,
+                              gbp_hash_mode_t hash_mode, index_t * nhs);
+extern index_t gbp_next_hop_alloc (const ip46_address_t * ip,
+                                  index_t grd,
+                                  const mac_address_t * mac, index_t gbd);
 
 typedef int (*gbp_contract_cb_t) (gbp_contract_t * gbpe, void *ctx);
 extern void gbp_contract_walk (gbp_contract_cb_t bgpe, void *ctx);
 
+extern u8 *format_gbp_contract (u8 * s, va_list * args);
 
 /**
  * DP functions and databases
  */
 extern gbp_contract_db_t gbp_contract_db;
 
-always_inline u64
-gbp_acl_lookup (gbp_contract_key_t * key)
+always_inline index_t
+gbp_contract_find (gbp_contract_key_t * key)
 {
   uword *p;
 
@@ -107,7 +163,23 @@ gbp_acl_lookup (gbp_contract_key_t * key)
   if (NULL != p)
     return (p[0]);
 
-  return (~0);
+  return (INDEX_INVALID);
+}
+
+extern gbp_contract_t *gbp_contract_pool;
+
+always_inline gbp_contract_t *
+gbp_contract_get (index_t gci)
+{
+  return (pool_elt_at_index (gbp_contract_pool, gci));
+}
+
+extern gbp_rule_t *gbp_rule_pool;
+
+always_inline gbp_rule_t *
+gbp_rule_get (index_t gui)
+{
+  return (pool_elt_at_index (gbp_rule_pool, gui));
 }
 
 #endif
index 47adbb5..e9ced3f 100644 (file)
@@ -31,6 +31,7 @@
 #include <vnet/l2/l2_fib.h>
 #include <vnet/fib/fib_table.h>
 #include <vnet/ip/ip_neighbor.h>
+#include <vnet/fib/fib_walk.h>
 
 static const char *gbp_endpoint_attr_names[] = GBP_ENDPOINT_ATTR_NAMES;
 
@@ -39,6 +40,8 @@ static const char *gbp_endpoint_attr_names[] = GBP_ENDPOINT_ATTR_NAMES;
  */
 gbp_ep_db_t gbp_ep_db;
 
+fib_node_type_t gbp_endpoint_fib_type;
+
 vlib_log_class_t gbp_ep_logger;
 
 #define GBP_ENDPOINT_DBG(...)                           \
@@ -89,7 +92,7 @@ format_gbp_endpoint_flags (u8 * s, va_list * args)
 int
 gbp_endpoint_is_remote (const gbp_endpoint_t * ge)
 {
-  return (ge->ge_flags & GBP_ENDPOINT_FLAG_REMOTE);
+  return (ge->ge_fwd.gef_flags & GBP_ENDPOINT_FLAG_REMOTE);
 }
 
 static void
@@ -194,264 +197,522 @@ ip46_address_get_type (const ip46_address_t * a)
   return (ip46_address_is_ip4 (a) ? IP46_TYPE_IP4 : IP46_TYPE_IP6);
 }
 
-static ip46_type_t
-ip46_address_get_len (const ip46_address_t * a)
+static int
+gbp_endpoint_ip_is_equal (const fib_prefix_t * fp, const ip46_address_t * ip)
 {
-  return (ip46_address_is_ip4 (a) ? 32 : 128);
+  return (ip46_address_is_equal (ip, &fp->fp_addr));
+}
+
+static void
+gbp_endpoint_ips_update (gbp_endpoint_t * ge,
+                        const ip46_address_t * ips,
+                        const gbp_route_domain_t * grd)
+{
+  const ip46_address_t *ip;
+  index_t gei, grdi;
+
+  gei = gbp_endpoint_index (ge);
+  grdi = gbp_route_domain_index (grd);
+
+  ASSERT ((ge->ge_key.gek_grd == INDEX_INVALID) ||
+         (ge->ge_key.gek_grd == grdi));
+
+  vec_foreach (ip, ips)
+  {
+    if (~0 == vec_search_with_function (ge->ge_key.gek_ips, ip,
+                                       gbp_endpoint_ip_is_equal))
+      {
+       fib_prefix_t *pfx;
+
+       vec_add2 (ge->ge_key.gek_ips, pfx, 1);
+       fib_prefix_from_ip46_addr (ip, pfx);
+
+       gbp_endpoint_add_ip (&pfx->fp_addr,
+                            grd->grd_fib_index[pfx->fp_proto], gei);
+      }
+    ge->ge_key.gek_grd = grdi;
+  }
 }
 
 static gbp_endpoint_t *
-gbp_endpoint_alloc (epg_id_t epg_id,
-                   index_t ggi, u32 sw_if_index, gbp_endpoint_flags_t flags,
-                   const ip46_address_t * tun_src,
-                   const ip46_address_t * tun_dst)
+gbp_endpoint_alloc (const ip46_address_t * ips,
+                   const gbp_route_domain_t * grd,
+                   const mac_address_t * mac,
+                   const gbp_bridge_domain_t * gbd)
 {
   gbp_endpoint_t *ge;
+  index_t gei;
 
   pool_get_zero (gbp_endpoint_pool, ge);
 
-  ge->ge_epg = ggi;
-  ge->ge_epg_id = epg_id;
-  ge->ge_flags = flags;
-  ge->ge_sw_if_index = sw_if_index;
+  fib_node_init (&ge->ge_node, gbp_endpoint_fib_type);
+  gei = gbp_endpoint_index (ge);
+  ge->ge_key.gek_gbd =
+    ge->ge_key.gek_grd = ge->ge_fwd.gef_itf = INDEX_INVALID;
   ge->ge_last_time = vlib_time_now (vlib_get_main ());
+  ge->ge_key.gek_gbd = gbp_bridge_domain_index (gbd);
+
+  if (NULL != mac)
+    {
+      mac_address_copy (&ge->ge_key.gek_mac, mac);
+      gbp_endpoint_add_mac (mac, gbd->gb_bd_index, gei);
+    }
+  gbp_endpoint_ips_update (ge, ips, grd);
+
+  return (ge);
+}
+
+static int
+gbp_endpoint_loc_is_equal (gbp_endpoint_loc_t * a, gbp_endpoint_loc_t * b)
+{
+  return (a->gel_src == b->gel_src);
+}
+
+static int
+gbp_endpoint_loc_cmp_for_sort (gbp_endpoint_loc_t * a, gbp_endpoint_loc_t * b)
+{
+  return (a->gel_src - b->gel_src);
+}
+
+static gbp_endpoint_loc_t *
+gbp_endpoint_loc_find (gbp_endpoint_t * ge, gbp_endpoint_src_t src)
+{
+  gbp_endpoint_loc_t gel = {
+    .gel_src = src,
+  };
+  u32 pos;
+
+  pos = vec_search_with_function (ge->ge_locs, &gel,
+                                 gbp_endpoint_loc_is_equal);
+
+  if (~0 != pos)
+    return (&ge->ge_locs[pos]);
+
+  return NULL;
+}
+
+static int
+gbp_endpoint_loc_unlock (gbp_endpoint_t * ge, gbp_endpoint_loc_t * gel)
+{
+  u32 pos;
+
+  gel->gel_locks--;
+
+  if (0 == gel->gel_locks)
+    {
+      pos = gel - ge->ge_locs;
+
+      vec_del1 (ge->ge_locs, pos);
+      if (vec_len (ge->ge_locs) > 1)
+       vec_sort_with_function (ge->ge_locs, gbp_endpoint_loc_cmp_for_sort);
+
+      /* This could be the last lock, so don't access the EP from
+       * this point on */
+      fib_node_unlock (&ge->ge_node);
+
+      return (1);
+    }
+  return (0);
+}
+
+static void
+gbp_endpoint_loc_destroy (gbp_endpoint_loc_t * gel)
+{
+  gbp_endpoint_group_unlock (gel->gel_epg);
+
+  if (gel->gel_flags & GBP_ENDPOINT_FLAG_REMOTE)
+    {
+      vxlan_gbp_tunnel_unlock (gel->gel_sw_if_index);
+    }
+}
+
+static gbp_endpoint_loc_t *
+gbp_endpoint_loc_find_or_add (gbp_endpoint_t * ge, gbp_endpoint_src_t src)
+{
+  gbp_endpoint_loc_t gel = {
+    .gel_src = src,
+    .gel_epg = INDEX_INVALID,
+    .gel_sw_if_index = INDEX_INVALID,
+    .gel_locks = 0,
+  };
+  u32 pos;
+
+  pos = vec_search_with_function (ge->ge_locs, &gel,
+                                 gbp_endpoint_loc_is_equal);
+
+  if (~0 == pos)
+    {
+      vec_add1 (ge->ge_locs, gel);
+
+      if (vec_len (ge->ge_locs) > 1)
+       {
+         vec_sort_with_function (ge->ge_locs, gbp_endpoint_loc_cmp_for_sort);
+
+         pos = vec_search_with_function (ge->ge_locs, &gel,
+                                         gbp_endpoint_loc_is_equal);
+       }
+      else
+       pos = 0;
+
+      /*
+       * it's the sources and children that lock the endpoints
+       */
+      fib_node_lock (&ge->ge_node);
+    }
+
+  return (&ge->ge_locs[pos]);
+}
+
+/**
+ * Find an EP inthe DBs and check that if we find it in the L2 DB
+ * it has the same IPs as this update
+ */
+static int
+gbp_endpoint_find_for_update (const ip46_address_t * ips,
+                             const gbp_route_domain_t * grd,
+                             const mac_address_t * mac,
+                             const gbp_bridge_domain_t * gbd,
+                             gbp_endpoint_t ** ge)
+{
+  gbp_endpoint_t *l2_ge, *l3_ge, *tmp;
+
+  l2_ge = l3_ge = NULL;
+
+  if (NULL != mac && !mac_address_is_zero (mac))
+    {
+      ASSERT (gbd);
+      l2_ge = gbp_endpoint_find_mac (mac->bytes, gbd->gb_bd_index);
+    }
+  if (NULL != ips && !ip46_address_is_zero (ips))
+    {
+      const ip46_address_t *ip;
+      fib_protocol_t fproto;
+
+      ASSERT (grd);
+      vec_foreach (ip, ips)
+      {
+       fproto = fib_proto_from_ip46 (ip46_address_get_type (ip));
+
+       tmp = gbp_endpoint_find_ip (ip, grd->grd_fib_index[fproto]);
+
+       if (NULL == tmp)
+         /* not found */
+         continue;
+       else if (NULL == l3_ge)
+         /* first match against an IP address */
+         l3_ge = tmp;
+       else if (tmp == l3_ge)
+         /* another match against IP address that is the same endpoint */
+         continue;
+       else
+         {
+           /*
+            *  a match agains a different endpoint.
+            * this means the KEY of the EP is changing which is not allowed
+            */
+           return (-1);
+         }
+      }
+    }
+
+  if (NULL == l2_ge && NULL == l3_ge)
+    /* not found */
+    *ge = NULL;
+  else if (NULL == l2_ge)
+    /* found at L3 */
+    *ge = l3_ge;
+  else if (NULL == l3_ge)
+    /* found at L2 */
+    *ge = l2_ge;
+  else
+    {
+      /* found both L3 and L2 - they must be the same else the KEY
+       * is changing
+       */
+      if (l2_ge == l3_ge)
+       *ge = l2_ge;
+      else
+       return (-1);
+    }
+
+  return (0);
+}
 
-  gbp_endpoint_group_find_and_lock (epg_id);
+static gbp_endpoint_src_t
+gbp_endpoint_get_best_src (const gbp_endpoint_t * ge)
+{
+  if (0 == vec_len (ge->ge_locs))
+    return (GBP_ENDPOINT_SRC_MAX);
+
+  return (ge->ge_locs[0].gel_src);
+}
+
+static void
+gbp_endpoint_n_learned (int n)
+{
+  gbp_n_learnt_endpoints += n;
+
+  if (n > 0 && 1 == gbp_n_learnt_endpoints)
+    {
+      vlib_process_signal_event (vlib_get_main (),
+                                gbp_scanner_node.index,
+                                GBP_ENDPOINT_SCAN_START, 0);
+    }
+  if (n < 0 && 0 == gbp_n_learnt_endpoints)
+    {
+      vlib_process_signal_event (vlib_get_main (),
+                                gbp_scanner_node.index,
+                                GBP_ENDPOINT_SCAN_STOP, 0);
+    }
+}
+
+static void
+gbp_endpoint_loc_update (gbp_endpoint_loc_t * gel,
+                        u32 sw_if_index,
+                        index_t ggi,
+                        gbp_endpoint_flags_t flags,
+                        const ip46_address_t * tun_src,
+                        const ip46_address_t * tun_dst)
+{
+  int was_learnt, is_learnt;
+
+  gel->gel_locks++;
+  was_learnt = ! !(gel->gel_flags & GBP_ENDPOINT_FLAG_REMOTE);
+  gel->gel_flags = flags;
+  is_learnt = ! !(gel->gel_flags & GBP_ENDPOINT_FLAG_REMOTE);
+
+  gbp_endpoint_n_learned (is_learnt - was_learnt);
 
-  if (gbp_endpoint_is_remote (ge))
+  if (INDEX_INVALID == gel->gel_epg)
+    {
+      gel->gel_epg = ggi;
+      if (INDEX_INVALID != gel->gel_epg)
+       {
+         gbp_endpoint_group_lock (gel->gel_epg);
+       }
+    }
+  else
+    {
+      ASSERT (gel->gel_epg == ggi);
+    }
+
+  if (gel->gel_flags & GBP_ENDPOINT_FLAG_REMOTE)
     {
       if (NULL != tun_src)
-       ip46_address_copy (&ge->tun.ge_src, tun_src);
+       ip46_address_copy (&gel->tun.gel_src, tun_src);
       if (NULL != tun_dst)
-       ip46_address_copy (&ge->tun.ge_dst, tun_dst);
+       ip46_address_copy (&gel->tun.gel_dst, tun_dst);
 
       /*
        * the input interface may be the parent GBP-vxlan interface,
        * create a child vlxan-gbp tunnel and use that as the endpoint's
        * interface.
        */
+      if (~0 != gel->gel_sw_if_index)
+       vxlan_gbp_tunnel_unlock (gel->gel_sw_if_index);
+
       switch (gbp_vxlan_tunnel_get_type (sw_if_index))
        {
        case GBP_VXLAN_TEMPLATE_TUNNEL:
-         ge->tun.ge_parent_sw_if_index = sw_if_index;
-         ge->ge_sw_if_index =
+         gel->tun.gel_parent_sw_if_index = sw_if_index;
+         gel->gel_sw_if_index =
            gbp_vxlan_tunnel_clone_and_lock (sw_if_index,
-                                            &ge->tun.ge_src,
-                                            &ge->tun.ge_dst);
+                                            &gel->tun.gel_src,
+                                            &gel->tun.gel_dst);
          break;
        case VXLAN_GBP_TUNNEL:
-         ge->tun.ge_parent_sw_if_index =
+         gel->tun.gel_parent_sw_if_index =
            vxlan_gbp_tunnel_get_parent (sw_if_index);
-         ge->ge_sw_if_index = sw_if_index;
-         vxlan_gbp_tunnel_lock (ge->ge_sw_if_index);
+         gel->gel_sw_if_index = sw_if_index;
+         vxlan_gbp_tunnel_lock (gel->gel_sw_if_index);
          break;
        }
     }
-
-  return (ge);
+  else
+    {
+      gel->gel_sw_if_index = sw_if_index;
+    }
 }
 
-int
-gbp_endpoint_update (u32 sw_if_index,
-                    const ip46_address_t * ips,
-                    const mac_address_t * mac,
-                    epg_id_t epg_id,
-                    gbp_endpoint_flags_t flags,
-                    const ip46_address_t * tun_src,
-                    const ip46_address_t * tun_dst, u32 * handle)
+static void
+gbb_endpoint_fwd_reset (gbp_endpoint_t * ge)
 {
-  gbp_endpoint_group_t *gg;
-  gbp_endpoint_t *ge;
-  index_t ggi, gei;
+  const gbp_route_domain_t *grd;
+  const gbp_bridge_domain_t *gbd;
+  gbp_endpoint_fwd_t *gef;
+  const fib_prefix_t *pfx;
+  index_t *ai;
+  index_t gei;
+
+  gei = gbp_endpoint_index (ge);
+  gbd = gbp_bridge_domain_get (ge->ge_key.gek_gbd);
+  gef = &ge->ge_fwd;
+
+  vec_foreach (pfx, ge->ge_key.gek_ips)
+  {
+    u32 fib_index;
 
-  if (~0 == sw_if_index)
-    return (VNET_API_ERROR_INVALID_SW_IF_INDEX);
+    grd = gbp_route_domain_get (ge->ge_key.gek_grd);
+    fib_index = grd->grd_fib_index[pfx->fp_proto];
 
-  ge = NULL;
-  ggi = gbp_endpoint_group_find_and_lock (epg_id);
+    bd_add_del_ip_mac (gbd->gb_bd_index, fib_proto_to_ip46 (pfx->fp_proto),
+                      &pfx->fp_addr, &ge->ge_key.gek_mac, 0);
 
-  if (INDEX_INVALID == ggi)
-    return (VNET_API_ERROR_NO_SUCH_ENTRY);
+    /*
+     * remove a host route
+     */
+    if (gbp_endpoint_is_remote (ge))
+      {
+       fib_table_entry_special_remove (fib_index, pfx, FIB_SOURCE_PLUGIN_HI);
+      }
 
-  gg = gbp_endpoint_group_get (ggi);
+    fib_table_entry_delete (fib_index, pfx, FIB_SOURCE_PLUGIN_LOW);
+  }
+  vec_foreach (ai, gef->gef_adjs)
+  {
+    adj_unlock (*ai);
+  }
 
-  /*
-   * L2 EP
-   */
-  if (NULL != mac && !mac_address_is_zero (mac))
+  if (INDEX_INVALID != gef->gef_itf)
     {
-      /*
-       * find an existing endpoint matching one of the key types
-       */
-      ge = gbp_endpoint_find_mac (mac->bytes, gg->gg_bd_index);
-      if (NULL == ge)
-       {
-         /*
-          * new entry
-          */
-         ge = gbp_endpoint_alloc (epg_id, ggi, sw_if_index, flags,
-                                  tun_src, tun_dst);
-         gei = gbp_endpoint_index (ge);
-         mac_address_copy (&ge->ge_mac, mac);
-
-         ge->ge_itf = gbp_itf_add_and_lock (ge->ge_sw_if_index,
-                                            gg->gg_bd_index);
-
-         gbp_itf_set_l2_input_feature (ge->ge_itf, gei,
-                                       L2INPUT_FEAT_GBP_FWD);
+      l2fib_del_entry (ge->ge_key.gek_mac.bytes,
+                      gbd->gb_bd_index, gef->gef_itf);
+      gbp_itf_set_l2_input_feature (gef->gef_itf, gei, (L2INPUT_FEAT_NONE));
+      gbp_itf_set_l2_output_feature (gef->gef_itf, gei, L2OUTPUT_FEAT_NONE);
 
-         if (gbp_endpoint_is_remote (ge))
-           {
-             gbp_itf_set_l2_output_feature (ge->ge_itf, gei,
-                                            L2OUTPUT_FEAT_GBP_POLICY_MAC);
-           }
-         else
-           {
-             gbp_endpoint_add_itf (ge->ge_sw_if_index, gei);
-             gbp_itf_set_l2_output_feature (ge->ge_itf, gei,
-                                            L2OUTPUT_FEAT_GBP_POLICY_PORT);
-           }
+      gbp_itf_unlock (gef->gef_itf);
+      gef->gef_itf = INDEX_INVALID;
+    }
 
-         gbp_endpoint_add_mac (mac, gg->gg_bd_index, gei);
+  vec_free (gef->gef_adjs);
+}
 
-         l2fib_add_entry (mac->bytes, gg->gg_bd_index, ge->ge_sw_if_index,
-                          L2FIB_ENTRY_RESULT_FLAG_STATIC);
-       }
-      else
-       {
-         /*
-          * update existing entry..
-          */
-         ge->ge_flags = flags;
-         gei = gbp_endpoint_index (ge);
-         goto out;
-       }
-    }
+static void
+gbb_endpoint_fwd_recalc (gbp_endpoint_t * ge)
+{
+  const gbp_route_domain_t *grd;
+  const gbp_bridge_domain_t *gbd;
+  const gbp_endpoint_group_t *gg;
+  gbp_endpoint_loc_t *gel;
+  gbp_endpoint_fwd_t *gef;
+  const fib_prefix_t *pfx;
+  index_t gei;
 
   /*
-   * L3 EP
+   * locations are sort in source priority order
    */
-  if (NULL != ips && !ip46_address_is_zero (ips))
+  gei = gbp_endpoint_index (ge);
+  gel = &ge->ge_locs[0];
+  gef = &ge->ge_fwd;
+  gbd = gbp_bridge_domain_get (ge->ge_key.gek_gbd);
+
+  gef->gef_flags = gel->gel_flags;
+
+  if (INDEX_INVALID != gel->gel_epg)
     {
-      const ip46_address_t *ip;
-      fib_protocol_t fproto;
-      gbp_endpoint_t *l3_ge;
-      u32 ii;
+      gg = gbp_endpoint_group_get (gel->gel_epg);
+      gef->gef_epg_id = gg->gg_id;
+    }
+  else
+    {
+      gg = NULL;
+    }
 
-      /*
-       * look for a matching EP by any of the address
-       * An EP's IP addresses cannot change so we can search based on
-       * the first
-       */
-      fproto = fib_proto_from_ip46 (ip46_address_get_type (&ips[0]));
+  gef->gef_itf = gbp_itf_add_and_lock (gel->gel_sw_if_index,
+                                      gbd->gb_bd_index);
+
+  if (!mac_address_is_zero (&ge->ge_key.gek_mac))
+    {
+      gbp_itf_set_l2_input_feature (gef->gef_itf, gei, L2INPUT_FEAT_GBP_FWD);
 
-      l3_ge = gbp_endpoint_find_ip (&ips[0],
-                                   gbp_endpoint_group_get_fib_index (gg,
-                                                                     fproto));
-      if (NULL == l3_ge)
+      if (gbp_endpoint_is_remote (ge))
        {
-         if (NULL == ge)
-           {
-             ge = gbp_endpoint_alloc (epg_id, ggi, sw_if_index, flags,
-                                      tun_src, tun_dst);
-             ge->ge_itf = gbp_itf_add_and_lock (sw_if_index, ~0);
-           }
-         /* else
-          *   L2 EP with IPs
-          */
+         gbp_itf_set_l2_output_feature (gef->gef_itf, gei,
+                                        L2OUTPUT_FEAT_GBP_POLICY_MAC);
        }
       else
        {
-         /* modify */
-         ge = l3_ge;
-         ge->ge_flags = flags;
-         gei = gbp_endpoint_index (ge);
-         goto out;
+         gbp_endpoint_add_itf (gef->gef_itf, gei);
+         gbp_itf_set_l2_output_feature (gef->gef_itf, gei,
+                                        L2OUTPUT_FEAT_GBP_POLICY_PORT);
        }
+      l2fib_add_entry (ge->ge_key.gek_mac.bytes,
+                      gbd->gb_bd_index,
+                      gef->gef_itf, L2FIB_ENTRY_RESULT_FLAG_STATIC);
+    }
 
-      gei = gbp_endpoint_index (ge);
-      ge->ge_ips = ips;
-      vec_validate (ge->ge_adjs, vec_len (ips) - 1);
-
-      vec_foreach_index (ii, ge->ge_ips)
+  vec_foreach (pfx, ge->ge_key.gek_ips)
+  {
+    ethernet_header_t *eth;
+    u32 ip_sw_if_index;
+    u32 fib_index;
+    u8 *rewrite;
+    index_t ai;
+
+    rewrite = NULL;
+    grd = gbp_route_domain_get (ge->ge_key.gek_grd);
+    fib_index = grd->grd_fib_index[pfx->fp_proto];
+
+    bd_add_del_ip_mac (gbd->gb_bd_index, fib_proto_to_ip46 (pfx->fp_proto),
+                      &pfx->fp_addr, &ge->ge_key.gek_mac, 1);
+
+    /*
+     * add a host route via the EPG's BVI we need this because the
+     * adj fib does not install, due to cover refinement check, since
+     * the BVI's prefix is /32
+     */
+    vec_validate (rewrite, sizeof (*eth) - 1);
+    eth = (ethernet_header_t *) rewrite;
+
+    eth->type = clib_host_to_net_u16 ((pfx->fp_proto == FIB_PROTOCOL_IP4 ?
+                                      ETHERNET_TYPE_IP4 :
+                                      ETHERNET_TYPE_IP6));
+
+    if (gbp_endpoint_is_remote (ge))
       {
-       ethernet_header_t *eth;
-       ip46_type_t ip_type;
-       u32 ip_sw_if_index;
-       u8 *rewrite;
-
-       rewrite = NULL;
-       ip = &ge->ge_ips[ii];
-       ip_type = ip46_address_get_type (ip);
-       fproto = fib_proto_from_ip46 (ip_type);
-
-       bd_add_del_ip_mac (gg->gg_bd_index, ip_type, ip, &ge->ge_mac, 1);
-
-       // FIXME - check error
-       gbp_endpoint_add_ip (ip,
-                            gbp_endpoint_group_get_fib_index (gg, fproto),
-                            gei);
-
        /*
-        * add a host route via the EPG's BVI we need this because the
-        * adj fib does not install, due to cover refinement check, since
-        * the BVI's prefix is /32
+        * for dynamic EPs we must add the IP adjacency via the learned
+        * tunnel since the BD will not contain the EP's MAC since it was
+        * L3 learned. The dst MAC address used is the 'BD's MAC'.
         */
-       fib_prefix_t pfx = {
-         .fp_proto = fproto,
-         .fp_len = ip46_address_get_len (ip),
-         .fp_addr = *ip,
-       };
-       vec_validate (rewrite, sizeof (*eth) - 1);
-       eth = (ethernet_header_t *) rewrite;
-
-       eth->type = clib_host_to_net_u16 ((fproto == FIB_PROTOCOL_IP4 ?
-                                          ETHERNET_TYPE_IP4 :
-                                          ETHERNET_TYPE_IP6));
-
-       if (gbp_endpoint_is_remote (ge))
-         {
-           /*
-            * for dynamic EPs we msut add the IP adjacency via the learned
-            * tunnel since the BD will not contain the EP's MAC since it was
-            * L3 learned. The dst MAC address used is the 'BD's MAC'.
-            */
-           ip_sw_if_index = ge->ge_sw_if_index;
+       ip_sw_if_index = gef->gef_itf;
 
-           mac_address_to_bytes (gbp_route_domain_get_local_mac (),
-                                 eth->src_address);
-           mac_address_to_bytes (gbp_route_domain_get_remote_mac (),
-                                 eth->dst_address);
-         }
-       else
-         {
-           /*
-            * for the static EPs we add the IP adjacency via the BVI
-            * knowing that the BD has the MAC address to route to and
-            * that policy will be applied on egress to the EP's port
-            */
-           ip_sw_if_index = gbp_endpoint_group_get_bvi (gg);
+       mac_address_to_bytes (gbp_route_domain_get_local_mac (),
+                             eth->src_address);
+       mac_address_to_bytes (gbp_route_domain_get_remote_mac (),
+                             eth->dst_address);
+      }
+    else
+      {
+       /*
+        * for the static EPs we add the IP adjacency via the BVI
+        * knowing that the BD has the MAC address to route to and
+        * that policy will be applied on egress to the EP's port
+        */
+       ip_sw_if_index = gbd->gb_bvi_sw_if_index;
 
-           clib_memcpy (eth->src_address,
-                        vnet_sw_interface_get_hw_address (vnet_get_main (),
-                                                          ip_sw_if_index),
-                        sizeof (eth->src_address));
-           mac_address_to_bytes (&ge->ge_mac, eth->dst_address);
-         }
+       clib_memcpy (eth->src_address,
+                    vnet_sw_interface_get_hw_address (vnet_get_main (),
+                                                      ip_sw_if_index),
+                    sizeof (eth->src_address));
+       mac_address_to_bytes (&ge->ge_key.gek_mac, eth->dst_address);
+      }
 
-       fib_table_entry_path_add
-         (gbp_endpoint_group_get_fib_index (gg, fproto),
-          &pfx, FIB_SOURCE_PLUGIN_LOW,
-          FIB_ENTRY_FLAG_NONE,
-          fib_proto_to_dpo (fproto), ip, ip_sw_if_index,
-          ~0, 1, NULL, FIB_ROUTE_PATH_FLAG_NONE);
+    fib_table_entry_path_add (fib_index, pfx,
+                             FIB_SOURCE_PLUGIN_LOW,
+                             FIB_ENTRY_FLAG_NONE,
+                             fib_proto_to_dpo (pfx->fp_proto),
+                             &pfx->fp_addr, ip_sw_if_index,
+                             ~0, 1, NULL, FIB_ROUTE_PATH_FLAG_NONE);
 
-       ge->ge_adjs[ii] = adj_nbr_add_or_lock_w_rewrite (fproto,
-                                                        fib_proto_to_link
-                                                        (fproto), ip,
-                                                        ip_sw_if_index,
-                                                        rewrite);
+    ai = adj_nbr_add_or_lock_w_rewrite (pfx->fp_proto,
+                                       fib_proto_to_link (pfx->fp_proto),
+                                       &pfx->fp_addr,
+                                       ip_sw_if_index, rewrite);
+    vec_add1 (gef->gef_adjs, ai);
 
+    if (NULL != gg)
+      {
        if (gbp_endpoint_is_remote (ge))
          {
            dpo_id_t policy_dpo = DPO_INVALID;
@@ -460,13 +721,13 @@ gbp_endpoint_update (u32 sw_if_index,
             * interpose a policy DPO from the endpoint so that policy
             * is applied
             */
-           gbp_policy_dpo_add_or_lock (fib_proto_to_dpo (fproto),
+           gbp_policy_dpo_add_or_lock (fib_proto_to_dpo (pfx->fp_proto),
                                        gg->gg_id, ~0, &policy_dpo);
 
-           fib_table_entry_special_dpo_add
-             (gbp_endpoint_group_get_fib_index (gg, fproto),
-              &pfx,
-              FIB_SOURCE_PLUGIN_HI, FIB_ENTRY_FLAG_INTERPOSE, &policy_dpo);
+           fib_table_entry_special_dpo_add (fib_index, pfx,
+                                            FIB_SOURCE_PLUGIN_HI,
+                                            FIB_ENTRY_FLAG_INTERPOSE,
+                                            &policy_dpo);
          }
 
        /*
@@ -474,68 +735,148 @@ gbp_endpoint_update (u32 sw_if_index,
         * that if this EP has moved from some other place in the
         * 'fabric', upstream devices are informed
         */
-       if (!(gbp_endpoint_is_remote (ge)) && ~0 != gg->gg_uplink_sw_if_index)
+       if (!gbp_endpoint_is_remote (ge) && ~0 != gg->gg_uplink_sw_if_index)
          {
-           gbp_endpoint_add_itf (sw_if_index, gei);
-           if (ip46_address_is_ip4 (ip))
+           gbp_endpoint_add_itf (gef->gef_itf, gei);
+           if (FIB_PROTOCOL_IP4 == pfx->fp_proto)
              send_ip4_garp_w_addr (vlib_get_main (),
-                                   &ip->ip4, gg->gg_uplink_sw_if_index);
+                                   &pfx->fp_addr.ip4,
+                                   gg->gg_uplink_sw_if_index);
            else
              send_ip6_na_w_addr (vlib_get_main (),
-                                 &ip->ip6, gg->gg_uplink_sw_if_index);
+                                 &pfx->fp_addr.ip6,
+                                 gg->gg_uplink_sw_if_index);
          }
       }
+  }
+
+  if (!gbp_endpoint_is_remote (ge))
+    {
+      /*
+       * non-remote endpoints (i.e. those not arriving on iVXLAN
+       * tunnels) need to be classifed based on the the input interface.
+       * We enable the GBP-FWD feature only if the group has an uplink
+       * interface (on which the GBP-FWD feature would send UU traffic).
+       */
+      l2input_feat_masks_t feats = L2INPUT_FEAT_GBP_SRC_CLASSIFY;
+
+      if (NULL != gg && ~0 != gg->gg_uplink_sw_if_index)
+       feats |= L2INPUT_FEAT_GBP_FWD;
+      gbp_itf_set_l2_input_feature (gef->gef_itf, gei, feats);
     }
 
-  if (NULL == ge)
-    return (0);
+  /*
+   * update children with the new forwarding info
+   */
+  fib_node_back_walk_ctx_t bw_ctx = {
+    .fnbw_reason = FIB_NODE_BW_REASON_FLAG_EVALUATE,
+    .fnbw_flags = FIB_NODE_BW_FLAG_FORCE_SYNC,
+  };
+
+  fib_walk_sync (gbp_endpoint_fib_type, gei, &bw_ctx);
+}
+
+int
+gbp_endpoint_update_and_lock (gbp_endpoint_src_t src,
+                             u32 sw_if_index,
+                             const ip46_address_t * ips,
+                             const mac_address_t * mac,
+                             index_t gbdi, index_t grdi, epg_id_t epg_id,
+                             gbp_endpoint_flags_t flags,
+                             const ip46_address_t * tun_src,
+                             const ip46_address_t * tun_dst, u32 * handle)
+{
+  gbp_bridge_domain_t *gbd;
+  gbp_endpoint_group_t *gg;
+  gbp_endpoint_src_t best;
+  gbp_route_domain_t *grd;
+  gbp_endpoint_loc_t *gel;
+  gbp_endpoint_t *ge;
+  index_t ggi, gei;
+  int rv;
+
+  if (~0 == sw_if_index)
+    return (VNET_API_ERROR_INVALID_SW_IF_INDEX);
+
+  ge = NULL;
+  gg = NULL;
 
   /*
-   * count the number of dynamic entries and kick off the scanner
-   * process is this is our first.
+   * we need to determine the bridge-domain, either from the EPG or
+   * the BD passed
    */
-  if (gbp_endpoint_is_remote (ge))
+  if (EPG_INVALID != epg_id)
     {
-      gbp_n_learnt_endpoints++;
+      ggi = gbp_endpoint_group_find (epg_id);
 
-      if (1 == gbp_n_learnt_endpoints)
-       {
-         vlib_process_signal_event (vlib_get_main (),
-                                    gbp_scanner_node.index,
-                                    GBP_ENDPOINT_SCAN_START, 0);
-       }
+      if (INDEX_INVALID == ggi)
+       return (VNET_API_ERROR_NO_SUCH_ENTRY);
+
+      gg = gbp_endpoint_group_get (ggi);
+      gbdi = gg->gg_gbd;
+      grdi = gg->gg_rd;
+    }
+  else
+    {
+      if (INDEX_INVALID == gbdi)
+       return (VNET_API_ERROR_NO_SUCH_ENTRY);
+      if (INDEX_INVALID == grdi)
+       return (VNET_API_ERROR_NO_SUCH_FIB);
+      ggi = INDEX_INVALID;
+    }
+
+  gbd = gbp_bridge_domain_get (gbdi);
+  grd = gbp_route_domain_get (grdi);
+  rv = gbp_endpoint_find_for_update (ips, grd, mac, gbd, &ge);
+
+  if (0 != rv)
+    return (rv);
+
+  if (NULL == ge)
+    {
+      ge = gbp_endpoint_alloc (ips, grd, mac, gbd);
     }
   else
+    {
+      gbp_endpoint_ips_update (ge, ips, grd);
+    }
+
+  best = gbp_endpoint_get_best_src (ge);
+  gei = gbp_endpoint_index (ge);
+  gel = gbp_endpoint_loc_find_or_add (ge, src);
+
+  gbp_endpoint_loc_update (gel, sw_if_index, ggi, flags, tun_src, tun_dst);
+
+  if (src <= best)
     {
       /*
-       * non-remote endpoints (i.e. those not arriving on iVXLAN
-       * tunnels) need to be classifed based on the the input interface.
-       * We enable the GBP-FWD feature only is the group has an uplink
-       * interface (on which the GBP-FWD feature would send UU traffic).
+       * either the best source has been updated or we have a new best source
+       */
+      gbb_endpoint_fwd_reset (ge);
+      gbb_endpoint_fwd_recalc (ge);
+    }
+  else
+    {
+      /*
+       * an update to a lower priority source, so we need do nothing
        */
-      l2input_feat_masks_t feats = L2INPUT_FEAT_GBP_SRC_CLASSIFY;
-
-      if (~0 != gg->gg_uplink_sw_if_index)
-       feats |= L2INPUT_FEAT_GBP_FWD;
-      gbp_itf_set_l2_input_feature (ge->ge_itf, gbp_endpoint_index (ge),
-                                   feats);
     }
-out:
 
   if (handle)
-    *handle = (ge - gbp_endpoint_pool);
+    *handle = gei;
 
-  gbp_endpoint_group_unlock (ggi);
   GBP_ENDPOINT_INFO ("update: %U", format_gbp_endpoint, gei);
 
   return (0);
 }
 
 void
-gbp_endpoint_delete (index_t gei)
+gbp_endpoint_unlock (gbp_endpoint_src_t src, index_t gei)
 {
-  gbp_endpoint_group_t *gg;
+  gbp_endpoint_loc_t *gel, gel_copy;
+  gbp_endpoint_src_t best;
   gbp_endpoint_t *ge;
+  int removed;
 
   if (pool_is_free_index (gbp_endpoint_pool, gei))
     return;
@@ -543,86 +884,90 @@ gbp_endpoint_delete (index_t gei)
   GBP_ENDPOINT_INFO ("delete: %U", format_gbp_endpoint, gei);
 
   ge = gbp_endpoint_get (gei);
-  gg = gbp_endpoint_group_get (ge->ge_epg);
-
-  gbp_endpoint_del_mac (&ge->ge_mac, gg->gg_bd_index);
-  l2fib_del_entry (ge->ge_mac.bytes, gg->gg_bd_index, ge->ge_sw_if_index);
-  gbp_itf_set_l2_input_feature (ge->ge_itf, gei, (L2INPUT_FEAT_NONE));
-  gbp_itf_set_l2_output_feature (ge->ge_itf, gei, L2OUTPUT_FEAT_NONE);
-
-  if (NULL != ge->ge_ips)
-    {
-      const ip46_address_t *ip;
-      index_t *ai;
-
-      vec_foreach (ai, ge->ge_adjs)
-      {
-       adj_unlock (*ai);
-      }
-      vec_foreach (ip, ge->ge_ips)
-      {
-       fib_protocol_t fproto;
-       ip46_type_t ip_type;
 
-       ip_type = ip46_address_get_type (ip);
-       fproto = fib_proto_from_ip46 (ip_type);
+  gel = gbp_endpoint_loc_find (ge, src);
 
-       gbp_endpoint_del_ip (ip,
-                            gbp_endpoint_group_get_fib_index (gg, fproto));
-
-       bd_add_del_ip_mac (gg->gg_bd_index, ip_type, ip, &ge->ge_mac, 0);
+  if (NULL == gel)
+    return;
 
-       /*
-        * remove a host route via the EPG's BVI
-        */
-       fib_prefix_t pfx = {
-         .fp_proto = fproto,
-         .fp_len = ip46_address_get_len (ip),
-         .fp_addr = *ip,
-       };
+  /*
+   * lock the EP so we can control when it is deleted
+   */
+  fib_node_lock (&ge->ge_node);
+  best = gbp_endpoint_get_best_src (ge);
 
-       if (gbp_endpoint_is_remote (ge))
-         {
-           fib_table_entry_special_remove
-             (gbp_endpoint_group_get_fib_index (gg, fproto),
-              &pfx, FIB_SOURCE_PLUGIN_HI);
-         }
+  /*
+   * copy the location info since we'll lose it when it's removed from
+   * the vector
+   */
+  clib_memcpy (&gel_copy, gel, sizeof (gel_copy));
 
-       fib_table_entry_path_remove
-         (gbp_endpoint_group_get_fib_index (gg, fproto),
-          &pfx, FIB_SOURCE_PLUGIN_LOW,
-          fib_proto_to_dpo (fproto), ip,
-          (gbp_endpoint_is_remote (ge) ?
-           ge->ge_sw_if_index :
-           gbp_endpoint_group_get_bvi (gg)),
-          ~0, 1, FIB_ROUTE_PATH_FLAG_NONE);
-      }
-    }
+  /*
+   * remove the source we no longer need
+   */
+  removed = gbp_endpoint_loc_unlock (ge, gel);
 
-  if (ge->ge_flags & GBP_ENDPOINT_FLAG_LEARNT)
+  if (src == best)
     {
-      gbp_n_learnt_endpoints--;
-
-      if (0 == gbp_n_learnt_endpoints)
+      /*
+       * we have removed the old best source => recalculate fwding
+       */
+      if (0 == vec_len (ge->ge_locs))
+       {
+         /*
+          * if there are no more sources left, then we need only release
+          * the fwding resources held and then this EP is gawn.
+          */
+         gbb_endpoint_fwd_reset (ge);
+       }
+      else
        {
-         vlib_process_signal_event (vlib_get_main (),
-                                    gbp_scanner_node.index,
-                                    GBP_ENDPOINT_SCAN_STOP, 0);
+         /*
+          * else there are more sources. release the old and get new
+          * fwding objects
+          */
+         gbb_endpoint_fwd_reset (ge);
+         gbb_endpoint_fwd_recalc (ge);
        }
     }
+  /*
+   * else
+   *  we removed a lower priority source so we need to do nothing
+   */
 
-  gbp_itf_unlock (ge->ge_itf);
-  if (gbp_endpoint_is_remote (ge))
-    {
-      vxlan_gbp_tunnel_unlock (ge->ge_sw_if_index);
-    }
-  gbp_endpoint_group_unlock (ge->ge_epg);
-  pool_put (gbp_endpoint_pool, ge);
+  /*
+   * clear up any resources held by the source
+   */
+  if (removed)
+    gbp_endpoint_loc_destroy (&gel_copy);
+
+  /*
+   * remove the lock taken above
+   */
+  fib_node_unlock (&ge->ge_node);
+  /*
+   *  We may have removed the last source and so this EP is now TOAST
+   *  DO NOTHING BELOW HERE
+   */
+}
+
+u32
+gbp_endpoint_child_add (index_t gei,
+                       fib_node_type_t type, fib_node_index_t index)
+{
+  return (fib_node_child_add (gbp_endpoint_fib_type, gei, type, index));
+}
+
+void
+gbp_endpoint_child_remove (index_t gei, u32 sibling)
+{
+  return (fib_node_child_remove (gbp_endpoint_fib_type, gei, sibling));
 }
 
 typedef struct gbp_endpoint_flush_ctx_t_
 {
   u32 sw_if_index;
+  gbp_endpoint_src_t src;
   index_t *geis;
 } gbp_endpoint_flush_ctx_t;
 
@@ -630,12 +975,13 @@ static walk_rc_t
 gbp_endpoint_flush_cb (index_t gei, void *args)
 {
   gbp_endpoint_flush_ctx_t *ctx = args;
+  gbp_endpoint_loc_t *gel;
   gbp_endpoint_t *ge;
 
   ge = gbp_endpoint_get (gei);
+  gel = gbp_endpoint_loc_find (ge, ctx->src);
 
-  if (gbp_endpoint_is_remote (ge) &&
-      ctx->sw_if_index == ge->tun.ge_parent_sw_if_index)
+  if ((NULL != gel) && ctx->sw_if_index == gel->tun.gel_parent_sw_if_index)
     {
       vec_add1 (ctx->geis, gei);
     }
@@ -647,18 +993,23 @@ gbp_endpoint_flush_cb (index_t gei, void *args)
  * remove all learnt endpoints using the interface
  */
 void
-gbp_endpoint_flush (u32 sw_if_index)
+gbp_endpoint_flush (gbp_endpoint_src_t src, u32 sw_if_index)
 {
   gbp_endpoint_flush_ctx_t ctx = {
     .sw_if_index = sw_if_index,
+    .src = src,
   };
   index_t *gei;
 
+  GBP_ENDPOINT_INFO ("flush: %U %U",
+                    format_gbp_endpoint_src, src,
+                    format_vnet_sw_if_index_name, vnet_get_main (),
+                    sw_if_index);
   gbp_endpoint_walk (gbp_endpoint_flush_cb, &ctx);
 
   vec_foreach (gei, ctx.geis)
   {
-    gbp_endpoint_delete (*gei);
+    gbp_endpoint_unlock (src, *gei);
   }
 
   vec_free (ctx.geis);
@@ -724,8 +1075,12 @@ gbp_endpoint_cli (vlib_main_t * vm,
        return clib_error_return (0, "EPG-ID must be specified");
 
       rv =
-       gbp_endpoint_update (sw_if_index, ips, &mac, epg_id,
-                            GBP_ENDPOINT_FLAG_NONE, NULL, NULL, &handle);
+       gbp_endpoint_update_and_lock (GBP_ENDPOINT_SRC_CP,
+                                     sw_if_index, ips, &mac,
+                                     INDEX_INVALID, INDEX_INVALID,
+                                     epg_id,
+                                     GBP_ENDPOINT_FLAG_NONE,
+                                     NULL, NULL, &handle);
 
       if (rv)
        return clib_error_return (0, "GBP Endpoint update returned %d", rv);
@@ -737,7 +1092,7 @@ gbp_endpoint_cli (vlib_main_t * vm,
       if (INDEX_INVALID == handle)
        return clib_error_return (0, "handle must be specified");
 
-      gbp_endpoint_delete (handle);
+      gbp_endpoint_unlock (GBP_ENDPOINT_SRC_CP, handle);
     }
 
   vec_free (ips);
@@ -745,7 +1100,6 @@ gbp_endpoint_cli (vlib_main_t * vm,
   return (NULL);
 }
 
-
 /*?
  * Configure a GBP Endpoint
  *
@@ -762,33 +1116,100 @@ VLIB_CLI_COMMAND (gbp_endpoint_cli_node, static) = {
 /* *INDENT-ON* */
 
 u8 *
-format_gbp_endpoint (u8 * s, va_list * args)
+format_gbp_endpoint_src (u8 * s, va_list * args)
 {
-  index_t gei = va_arg (*args, index_t);
-  const ip46_address_t *ip;
-  gbp_endpoint_t *ge;
+  gbp_endpoint_src_t action = va_arg (*args, gbp_endpoint_src_t);
 
-  ge = gbp_endpoint_get (gei);
+  switch (action)
+    {
+#define _(v,a) case GBP_ENDPOINT_SRC_##v: return (format (s, "%s", a));
+      foreach_gbp_endpoint_src
+#undef _
+    }
+
+  return (format (s, "unknown"));
+}
+
+static u8 *
+format_gbp_endpoint_fwd (u8 * s, va_list * args)
+{
+  gbp_endpoint_fwd_t *gef = va_arg (*args, gbp_endpoint_fwd_t *);
+
+  s = format (s, "fwd:");
+  s = format (s, "\n   itf:[%U]", format_gbp_itf, gef->gef_itf);
+  if (GBP_ENDPOINT_FLAG_NONE != gef->gef_flags)
+    {
+      s = format (s, " flags:%U", format_gbp_endpoint_flags, gef->gef_flags);
+    }
+
+  return (s);
+}
+
+static u8 *
+format_gbp_endpoint_key (u8 * s, va_list * args)
+{
+  gbp_endpoint_key_t *gek = va_arg (*args, gbp_endpoint_key_t *);
+  const fib_prefix_t *pfx;
 
-  s = format (s, "[@%d] ", gei);
-  s = format (s, "IPs:[");
+  s = format (s, "ips:[");
 
-  vec_foreach (ip, ge->ge_ips)
+  vec_foreach (pfx, gek->gek_ips)
   {
-    s = format (s, "%U, ", format_ip46_address, ip, IP46_TYPE_ANY);
+    s = format (s, "%U, ", format_fib_prefix, pfx);
   }
   s = format (s, "]");
 
-  s = format (s, " MAC:%U", format_mac_address_t, &ge->ge_mac);
-  s = format (s, " EPG-ID:%d", ge->ge_epg_id);
-  if (GBP_ENDPOINT_FLAG_NONE != ge->ge_flags)
+  s = format (s, " mac:%U", format_mac_address_t, &gek->gek_mac);
+
+  return (s);
+}
+
+static u8 *
+format_gbp_endpoint_loc (u8 * s, va_list * args)
+{
+  gbp_endpoint_loc_t *gel = va_arg (*args, gbp_endpoint_loc_t *);
+
+  s = format (s, "%U", format_gbp_endpoint_src, gel->gel_src);
+  s =
+    format (s, "\n    %U", format_vnet_sw_if_index_name, vnet_get_main (),
+           gel->gel_sw_if_index);
+  s = format (s, " EPG:%d", gel->gel_epg);
+
+  if (GBP_ENDPOINT_FLAG_NONE != gel->gel_flags)
+    {
+      s = format (s, " flags:%U", format_gbp_endpoint_flags, gel->gel_flags);
+    }
+  if (GBP_ENDPOINT_FLAG_REMOTE & gel->gel_flags)
     {
-      s = format (s, " flags:%U", format_gbp_endpoint_flags, ge->ge_flags);
+      s = format (s, " tun:[");
+      s = format (s, "parent:%U", format_vnet_sw_if_index_name,
+                 vnet_get_main (), gel->tun.gel_parent_sw_if_index);
+      s = format (s, " {%U,%U}]",
+                 format_ip46_address, &gel->tun.gel_src, IP46_TYPE_ANY,
+                 format_ip46_address, &gel->tun.gel_dst, IP46_TYPE_ANY);
     }
 
-  s = format (s, " itf:[%U]", format_gbp_itf, ge->ge_itf);
+  return (s);
+}
+
+u8 *
+format_gbp_endpoint (u8 * s, va_list * args)
+{
+  index_t gei = va_arg (*args, index_t);
+  gbp_endpoint_loc_t *gel;
+  gbp_endpoint_t *ge;
+
+  ge = gbp_endpoint_get (gei);
+
+  s = format (s, "[@%d] %U", gei, format_gbp_endpoint_key, &ge->ge_key);
   s = format (s, " last-time:[%f]", ge->ge_last_time);
 
+  vec_foreach (gel, ge->ge_locs)
+  {
+    s = format (s, "\n  %U", format_gbp_endpoint_loc, gel);
+  }
+  s = format (s, "\n  %U", format_gbp_endpoint_fwd, &ge->ge_fwd);
+
   return s;
 }
 
@@ -895,16 +1316,16 @@ VLIB_CLI_COMMAND (gbp_endpoint_show_node, static) = {
 static void
 gbp_endpoint_check (index_t gei, f64 start_time)
 {
+  gbp_endpoint_loc_t *gel;
   gbp_endpoint_t *ge;
 
   ge = gbp_endpoint_get (gei);
+  gel = gbp_endpoint_loc_find (ge, GBP_ENDPOINT_SRC_DP);
 
-  GBP_ENDPOINT_DBG ("scan at:%f -> %U", start_time, format_gbp_endpoint, gei);
-
-  if ((ge->ge_flags & GBP_ENDPOINT_FLAG_LEARNT) &&
+  if ((NULL != gel) &&
       ((start_time - ge->ge_last_time) > GBP_ENDPOINT_INACTIVE_TIME))
     {
-      gbp_endpoint_delete (gei);
+      gbp_endpoint_unlock (GBP_ENDPOINT_SRC_DP, gei);
     }
 }
 
@@ -1031,12 +1452,77 @@ gbp_endpoint_scan_threshold (void)
   return (GBP_ENDPOINT_INACTIVE_TIME);
 }
 
-#define GBP_EP_HASH_NUM_BUCKETS (2 * 1024)
-#define GBP_EP_HASH_MEMORY_SIZE (1 << 20)
+static fib_node_t *
+gbp_endpoint_get_node (fib_node_index_t index)
+{
+  gbp_endpoint_t *ge;
+
+  ge = gbp_endpoint_get (index);
+
+  return (&ge->ge_node);
+}
+
+static gbp_endpoint_t *
+gbp_endpoint_from_fib_node (fib_node_t * node)
+{
+  ASSERT (gbp_endpoint_fib_type == node->fn_type);
+  return ((gbp_endpoint_t *) node);
+}
+
+static void
+gbp_endpoint_last_lock_gone (fib_node_t * node)
+{
+  const gbp_bridge_domain_t *gbd;
+  const gbp_route_domain_t *grd;
+  const fib_prefix_t *pfx;
+  gbp_endpoint_t *ge;
+
+  ge = gbp_endpoint_from_fib_node (node);
+
+  ASSERT (0 == vec_len (ge->ge_locs));
+
+  gbd = gbp_bridge_domain_get (ge->ge_key.gek_gbd);
+
+  /*
+   * we have removed the last source. this EP is toast
+   */
+  if (INDEX_INVALID != ge->ge_key.gek_gbd)
+    {
+      gbp_endpoint_del_mac (&ge->ge_key.gek_mac, gbd->gb_bd_index);
+    }
+  vec_foreach (pfx, ge->ge_key.gek_ips)
+  {
+    grd = gbp_route_domain_get (ge->ge_key.gek_grd);
+    gbp_endpoint_del_ip (&pfx->fp_addr, grd->grd_fib_index[pfx->fp_proto]);
+  }
+  pool_put (gbp_endpoint_pool, ge);
+}
+
+static fib_node_back_walk_rc_t
+gbp_endpoint_back_walk_notify (fib_node_t * node,
+                              fib_node_back_walk_ctx_t * ctx)
+{
+  ASSERT (0);
+
+  return (FIB_NODE_BACK_WALK_CONTINUE);
+}
+
+/*
+ * The FIB path's graph node virtual function table
+ */
+static const fib_node_vft_t gbp_endpoint_vft = {
+  .fnv_get = gbp_endpoint_get_node,
+  .fnv_last_lock = gbp_endpoint_last_lock_gone,
+  .fnv_back_walk = gbp_endpoint_back_walk_notify,
+  // .fnv_mem_show = fib_path_memory_show,
+};
 
 static clib_error_t *
 gbp_endpoint_init (vlib_main_t * vm)
 {
+#define GBP_EP_HASH_NUM_BUCKETS (2 * 1024)
+#define GBP_EP_HASH_MEMORY_SIZE (1 << 20)
+
   clib_bihash_init_24_8 (&gbp_ep_db.ged_by_ip_rd,
                         "GBP Endpoints - IP/RD",
                         GBP_EP_HASH_NUM_BUCKETS, GBP_EP_HASH_MEMORY_SIZE);
@@ -1046,6 +1532,7 @@ gbp_endpoint_init (vlib_main_t * vm)
                         GBP_EP_HASH_NUM_BUCKETS, GBP_EP_HASH_MEMORY_SIZE);
 
   gbp_ep_logger = vlib_log_register_class ("gbp", "ep");
+  gbp_endpoint_fib_type = fib_node_register_new_type (&gbp_endpoint_vft);
 
   return (NULL);
 }
index bd157c9..d56e91d 100644 (file)
@@ -54,65 +54,152 @@ typedef enum gbp_endpoint_flags_t_
 extern u8 *format_gbp_endpoint_flags (u8 * s, va_list * args);
 
 /**
- * A Group Based Policy Endpoint.
- * This is typically a VM or container. If the endpoint is local (i.e. on
- * the same compute node as VPP) then there is one interface per-endpoint.
- * If the EP is remote,e.g. reachable over a [vxlan] tunnel, then there
- * will be multiple EPs reachable over the tunnel and they can be distinguished
- * via either their MAC or IP Address[es].
+ * Sources of Endpoints in priority order. The best (lowest value) source
+ * provides the forwarding information
  */
-typedef struct gbp_endpoint_t_
+#define foreach_gbp_endpoint_src    \
+  _(CP, "control-plane")            \
+  _(DP, "data-plane")               \
+  _(RR, "recursive-resolution")
+
+typedef enum gbp_endpoint_src_t_
+{
+#define _(v,s) GBP_ENDPOINT_SRC_##v,
+  foreach_gbp_endpoint_src
+#undef _
+} gbp_endpoint_src_t;
+
+#define GBP_ENDPOINT_SRC_MAX (GBP_ENDPOINT_SRC_RR+1)
+
+extern u8 *format_gbp_endpoint_src (u8 * s, va_list * args);
+
+/**
+ * This is the identity of an endpoint, as such it is information
+ * about an endpoint that is idempotent.
+ * The ID is used to add the EP into the various data-bases for retrieval.
+ */
+typedef struct gbp_endpoint_key_t_
 {
   /**
-   * The interface on which the EP is connected
+   * A vector of ip addresses that belong to the endpoint.
+   * Together with the route EPG's RD this forms the EP's L3 key
    */
-  index_t ge_itf;
-  u32 ge_sw_if_index;
+  fib_prefix_t *gek_ips;
 
   /**
-   * A vector of ip addresses that below to the endpoint
+   * MAC address of the endpoint.
+   * Together with the route EPG's BD this forms the EP's L2 key
    */
-  const ip46_address_t *ge_ips;
+  mac_address_t gek_mac;
 
   /**
-   * MAC address of the endpoint
+   * Index of the Bridge-Domain
    */
-  mac_address_t ge_mac;
+  index_t gek_gbd;
 
   /**
-   * Index of the Endpoint's Group
+   * Index of the Route-Domain
    */
-  index_t ge_epg;
+  index_t gek_grd;
+} gbp_endpoint_key_t;
+
+/**
+ * Information about the location of the endpoint provided by a source
+ * of endpoints
+ */
+typedef struct gbp_endpoint_loc_t_
+{
+  /**
+   * The source providing this location information
+   */
+  gbp_endpoint_src_t gel_src;
 
   /**
-   * Endpoint Group's ID
+   * The interface on which the EP is connected
    */
-  index_t ge_epg_id;
+  u32 gel_sw_if_index;
 
   /**
    * Endpoint flags
    */
-  gbp_endpoint_flags_t ge_flags;
+  gbp_endpoint_flags_t gel_flags;
 
   /**
-   * The L3 adj, if created
+   * Endpoint Group.
    */
-  index_t *ge_adjs;
+  index_t gel_epg;
 
   /**
-   * The last time a packet from seen from this end point
+   * number of times this source has locked this
    */
-  f64 ge_last_time;
+  u32 gel_locks;
 
   /**
    * Tunnel info for remote endpoints
    */
   struct
   {
-    u32 ge_parent_sw_if_index;
-    ip46_address_t ge_src;
-    ip46_address_t ge_dst;
+    u32 gel_parent_sw_if_index;
+    ip46_address_t gel_src;
+    ip46_address_t gel_dst;
   } tun;
+} gbp_endpoint_loc_t;
+
+/**
+ * And endpoints current forwarding state
+ */
+typedef struct gbp_endpoint_fwd_t_
+{
+  /**
+   * The interface on which the EP is connected
+   */
+  index_t gef_itf;
+
+  /**
+   * The L3 adj, if created
+   */
+  index_t *gef_adjs;
+
+  /**
+   * Endpoint Group's ID. cached for fast DP access.
+   */
+  epg_id_t gef_epg_id;
+
+  gbp_endpoint_flags_t gef_flags;
+} gbp_endpoint_fwd_t;
+
+/**
+ * A Group Based Policy Endpoint.
+ * This is typically a VM or container. If the endpoint is local (i.e. on
+ * the same compute node as VPP) then there is one interface per-endpoint.
+ * If the EP is remote,e.g. reachable over a [vxlan] tunnel, then there
+ * will be multiple EPs reachable over the tunnel and they can be distinguished
+ * via either their MAC or IP Address[es].
+ */
+typedef struct gbp_endpoint_t_
+{
+  /**
+   * A FIB node that allows the tracking of children.
+   */
+  fib_node_t ge_node;
+
+  /**
+   * The key/ID of this EP
+   */
+  gbp_endpoint_key_t ge_key;
+
+  /**
+   * Location information provided by the various sources.
+   * These are sorted based on source priority.
+   */
+  gbp_endpoint_loc_t *ge_locs;
+
+  gbp_endpoint_fwd_t ge_fwd;
+
+  /**
+   * The last time a packet from seen from this end point
+   */
+  f64 ge_last_time;
 } gbp_endpoint_t;
 
 extern u8 *format_gbp_endpoint (u8 * s, va_list * args);
@@ -127,22 +214,31 @@ typedef struct gbp_ep_by_ip_itf_db_t_
   clib_bihash_16_8_t ged_by_mac_bd;
 } gbp_ep_db_t;
 
-extern int gbp_endpoint_update (u32 sw_if_index,
-                               const ip46_address_t * ip,
-                               const mac_address_t * mac,
-                               epg_id_t epg_id,
-                               gbp_endpoint_flags_t flags,
-                               const ip46_address_t * tun_src,
-                               const ip46_address_t * tun_dst, u32 * handle);
-extern void gbp_endpoint_delete (index_t gbpei);
+extern int gbp_endpoint_update_and_lock (gbp_endpoint_src_t src,
+                                        u32 sw_if_index,
+                                        const ip46_address_t * ip,
+                                        const mac_address_t * mac,
+                                        index_t gbd, index_t grd,
+                                        epg_id_t epg_id,
+                                        gbp_endpoint_flags_t flags,
+                                        const ip46_address_t * tun_src,
+                                        const ip46_address_t * tun_dst,
+                                        u32 * handle);
+extern void gbp_endpoint_unlock (gbp_endpoint_src_t src, index_t gbpei);
+extern u32 gbp_endpoint_child_add (index_t gei,
+                                  fib_node_type_t type,
+                                  fib_node_index_t index);
+extern void gbp_endpoint_child_remove (index_t gei, u32 sibling);
 
 typedef walk_rc_t (*gbp_endpoint_cb_t) (index_t gbpei, void *ctx);
 extern void gbp_endpoint_walk (gbp_endpoint_cb_t cb, void *ctx);
 extern void gbp_endpoint_scan (vlib_main_t * vm);
 extern f64 gbp_endpoint_scan_threshold (void);
 extern int gbp_endpoint_is_remote (const gbp_endpoint_t * ge);
+extern int gbp_endpoint_is_learnt (const gbp_endpoint_t * ge);
+
 
-extern void gbp_endpoint_flush (u32 sw_if_index);
+extern void gbp_endpoint_flush (gbp_endpoint_src_t src, u32 sw_if_index);
 
 /**
  * DP functions and databases
index ee4af2c..834f865 100644 (file)
@@ -44,7 +44,7 @@ gbp_endpoint_group_get (index_t i)
   return (pool_elt_at_index (gbp_endpoint_group_pool, i));
 }
 
-static void
+void
 gbp_endpoint_group_lock (index_t i)
 {
   gbp_endpoint_group_t *gg;
@@ -66,21 +66,6 @@ gbp_endpoint_group_find (epg_id_t epg_id)
   return (INDEX_INVALID);
 }
 
-index_t
-gbp_endpoint_group_find_and_lock (epg_id_t epg_id)
-{
-  uword *p;
-
-  p = hash_get (gbp_endpoint_group_db.gg_hash, epg_id);
-
-  if (NULL != p)
-    {
-      gbp_endpoint_group_lock (p[0]);
-      return p[0];
-    }
-  return (INDEX_INVALID);
-}
-
 int
 gbp_endpoint_group_add_and_lock (epg_id_t epg_id,
                                 u32 bd_id, u32 rd_id, u32 uplink_sw_if_index)
@@ -165,6 +150,9 @@ gbp_endpoint_group_unlock (index_t ggi)
 {
   gbp_endpoint_group_t *gg;
 
+  if (INDEX_INVALID == ggi)
+    return;
+
   gg = gbp_endpoint_group_get (ggi);
 
   gg->gg_locks--;
@@ -227,7 +215,7 @@ gbp_endpoint_group_get_bd_id (const gbp_endpoint_group_t * gg)
 }
 
 index_t
-gbp_endpoint_group_get_fib_index (gbp_endpoint_group_t * gg,
+gbp_endpoint_group_get_fib_index (const gbp_endpoint_group_t * gg,
                                  fib_protocol_t fproto)
 {
   const gbp_route_domain_t *grd;
@@ -237,16 +225,6 @@ gbp_endpoint_group_get_fib_index (gbp_endpoint_group_t * gg,
   return (grd->grd_fib_index[fproto]);
 }
 
-u32
-gbp_endpoint_group_get_bvi (gbp_endpoint_group_t * gg)
-{
-  const gbp_bridge_domain_t *gb;
-
-  gb = gbp_bridge_domain_get (gg->gg_gbd);
-
-  return (gb->gb_bvi_sw_if_index);
-}
-
 void
 gbp_endpoint_group_walk (gbp_endpoint_group_cb_t cb, void *ctx)
 {
index 7116a05..763a80e 100644 (file)
@@ -74,16 +74,15 @@ extern int gbp_endpoint_group_add_and_lock (epg_id_t epg_id,
                                            u32 bd_id,
                                            u32 rd_id,
                                            u32 uplink_sw_if_index);
-extern index_t gbp_endpoint_group_find_and_lock (epg_id_t epg_id);
 extern index_t gbp_endpoint_group_find (epg_id_t epg_id);
 extern int gbp_endpoint_group_delete (epg_id_t epg_id);
 extern void gbp_endpoint_group_unlock (index_t index);
+extern void gbp_endpoint_group_lock (index_t index);
 extern u32 gbp_endpoint_group_get_bd_id (const gbp_endpoint_group_t *);
 
 extern gbp_endpoint_group_t *gbp_endpoint_group_get (index_t i);
-extern index_t gbp_endpoint_group_get_fib_index (gbp_endpoint_group_t * gg,
-                                                fib_protocol_t fproto);
-extern u32 gbp_endpoint_group_get_bvi (gbp_endpoint_group_t * gg);
+extern index_t gbp_endpoint_group_get_fib_index (const gbp_endpoint_group_t *
+                                                gg, fib_protocol_t fproto);
 
 typedef int (*gbp_endpoint_group_cb_t) (gbp_endpoint_group_t * gbpe,
                                        void *ctx);
index cd6a15d..762b463 100644 (file)
@@ -110,11 +110,14 @@ gbp_learn_l2_cp (const gbp_learn_l2_t * gl2)
    * flip the source and dst, since that's how it was received, this API
    * takes how it's sent
    */
-  gbp_endpoint_update (gl2->sw_if_index, ips,
-                      &gl2->mac, gl2->epg,
-                      (GBP_ENDPOINT_FLAG_LEARNT |
-                       GBP_ENDPOINT_FLAG_REMOTE),
-                      &gl2->outer_dst, &gl2->outer_src, NULL);
+  gbp_endpoint_update_and_lock (GBP_ENDPOINT_SRC_DP,
+                               gl2->sw_if_index, ips,
+                               &gl2->mac, INDEX_INVALID,
+                               INDEX_INVALID, gl2->epg,
+                               (GBP_ENDPOINT_FLAG_LEARNT |
+                                GBP_ENDPOINT_FLAG_REMOTE),
+                               &gl2->outer_dst, &gl2->outer_src, NULL);
+  vec_free (ips);
 }
 
 static void
@@ -273,7 +276,7 @@ gbp_learn_l2 (vlib_main_t * vm,
          /*
           * check for new EP or a moved EP
           */
-         if (NULL == ge0 || ge0->ge_sw_if_index != sw_if_index0)
+         if (NULL == ge0 || ge0->ge_fwd.gef_itf != sw_if_index0)
 
            {
              /*
@@ -415,10 +418,13 @@ gbp_learn_l3_cp (const gbp_learn_l3_t * gl3)
 
   vec_add1 (ips, gl3->ip);
 
-  gbp_endpoint_update (gl3->sw_if_index, ips, NULL, gl3->epg,
-                      (GBP_ENDPOINT_FLAG_REMOTE |
-                       GBP_ENDPOINT_FLAG_LEARNT),
-                      &gl3->outer_dst, &gl3->outer_src, NULL);
+  gbp_endpoint_update_and_lock (GBP_ENDPOINT_SRC_DP,
+                               gl3->sw_if_index, ips, NULL,
+                               INDEX_INVALID, INDEX_INVALID, gl3->epg,
+                               (GBP_ENDPOINT_FLAG_REMOTE |
+                                GBP_ENDPOINT_FLAG_LEARNT),
+                               &gl3->outer_dst, &gl3->outer_src, NULL);
+  vec_free (ips);
 }
 
 static void
index 6d84a99..3b204ff 100644 (file)
@@ -14,6 +14,7 @@
  */
 
 #include <plugins/gbp/gbp.h>
+#include <plugins/gbp/gbp_policy_dpo.h>
 
 #include <vnet/vxlan-gbp/vxlan_gbp_packet.h>
 
@@ -67,6 +68,44 @@ typedef struct gbp_policy_trace_t_
   u32 allowed;
 } gbp_policy_trace_t;
 
+always_inline dpo_proto_t
+ethertype_to_dpo_proto (u16 etype)
+{
+  etype = clib_net_to_host_u16 (etype);
+
+  switch (etype)
+    {
+    case ETHERNET_TYPE_IP4:
+      return (DPO_PROTO_IP4);
+    case ETHERNET_TYPE_IP6:
+      return (DPO_PROTO_IP6);
+    }
+
+  return (DPO_PROTO_NONE);
+}
+
+always_inline u32
+gbp_rule_l2_redirect (const gbp_rule_t * gu, vlib_buffer_t * b0)
+{
+  const ethernet_header_t *eth0;
+  const dpo_id_t *dpo;
+  dpo_proto_t dproto;
+
+  eth0 = vlib_buffer_get_current (b0);
+  /* pop the ethernet header to prepare for L3 rewrite */
+  vlib_buffer_advance (b0, vnet_buffer (b0)->l2.l2_len);
+
+  dproto = ethertype_to_dpo_proto (eth0->type);
+  dpo = &gu->gu_dpo[GBP_POLICY_NODE_L2][dproto];
+
+  /* save the LB index for the next node and reset the IP flow hash
+   * so it's recalculated */
+  vnet_buffer (b0)->ip.adj_index[VLIB_TX] = dpo->dpoi_index;
+  vnet_buffer (b0)->ip.flow_hash = 0;
+
+  return (dpo->dpoi_next_node);
+}
+
 static uword
 gbp_policy_inline (vlib_main_t * vm,
                   vlib_node_runtime_t * node,
@@ -93,12 +132,12 @@ gbp_policy_inline (vlib_main_t * vm,
          const gbp_endpoint_t *ge0;
          gbp_policy_next_t next0;
          gbp_contract_key_t key0;
-         gbp_contract_value_t value0 = {
-           .as_u64 = ~0,
-         };
+         gbp_contract_t *gc0;
          u32 bi0, sw_if_index0;
          vlib_buffer_t *b0;
+         index_t gci0;
 
+         gc0 = NULL;
          next0 = GBP_POLICY_NEXT_DENY;
          bi0 = from[0];
          to_next[0] = bi0;
@@ -136,7 +175,7 @@ gbp_policy_inline (vlib_main_t * vm,
                                         vnet_buffer (b0)->l2.bd_index);
 
          if (NULL != ge0)
-           key0.gck_dst = ge0->ge_epg_id;
+           key0.gck_dst = ge0->ge_fwd.gef_epg_id;
          else
            /* If you cannot determine the destination EP then drop */
            goto trace;
@@ -161,9 +200,9 @@ gbp_policy_inline (vlib_main_t * vm,
                }
              else
                {
-                 value0.as_u64 = gbp_acl_lookup (&key0);
+                 gci0 = gbp_contract_find (&key0);
 
-                 if (~0 != value0.gc_lc_index)
+                 if (INDEX_INVALID != gci0)
                    {
                      fa_5tuple_opaque_t pkt_5tuple0;
                      u8 action0 = 0;
@@ -173,6 +212,7 @@ gbp_policy_inline (vlib_main_t * vm,
                      u16 ether_type0;
                      u8 is_ip60 = 0;
 
+                     gc0 = gbp_contract_get (gci0);
                      l2_len0 = vnet_buffer (b0)->l2.l2_len;
                      h0 = vlib_buffer_get_current (b0);
 
@@ -185,14 +225,14 @@ gbp_policy_inline (vlib_main_t * vm,
                       */
                      acl_plugin_fill_5tuple_inline (gm->
                                                     acl_plugin.p_acl_main,
-                                                    value0.gc_lc_index, b0,
+                                                    gc0->gc_lc_index, b0,
                                                     is_ip60,
                                                     /* is_input */ 0,
                                                     /* is_l2_path */ 1,
                                                     &pkt_5tuple0);
                      acl_plugin_match_5tuple_inline (gm->
                                                      acl_plugin.p_acl_main,
-                                                     value0.gc_lc_index,
+                                                     gc0->gc_lc_index,
                                                      &pkt_5tuple0, is_ip60,
                                                      &action0, &acl_pos_p0,
                                                      &acl_match_p0,
@@ -201,17 +241,30 @@ gbp_policy_inline (vlib_main_t * vm,
 
                      if (action0 > 0)
                        {
-                         vnet_buffer2 (b0)->gbp.flags |= VXLAN_GBP_GPFLAGS_A;
+                         gbp_rule_t *gu;
 
-                         next0 =
-                           vnet_l2_feature_next (b0,
-                                                 gpm->l2_output_feat_next
-                                                 [is_port_based],
-                                                 (is_port_based ?
-                                                  L2OUTPUT_FEAT_GBP_POLICY_PORT
-                                                  :
-                                                  L2OUTPUT_FEAT_GBP_POLICY_MAC));
-                         ;
+                         vnet_buffer2 (b0)->gbp.flags |= VXLAN_GBP_GPFLAGS_A;
+                         gu = gbp_rule_get (gc0->gc_rules[rule_match_p0]);
+
+                         switch (gu->gu_action)
+                           {
+                           case GBP_RULE_PERMIT:
+                             next0 = vnet_l2_feature_next
+                               (b0,
+                                gpm->l2_output_feat_next
+                                [is_port_based],
+                                (is_port_based ?
+                                 L2OUTPUT_FEAT_GBP_POLICY_PORT :
+                                 L2OUTPUT_FEAT_GBP_POLICY_MAC));
+                             break;
+                           case GBP_RULE_DENY:
+                             ASSERT (0);
+                             next0 = 0;
+                             break;
+                           case GBP_RULE_REDIRECT:
+                             next0 = gbp_rule_l2_redirect (gu, b0);
+                             break;
+                           }
                        }
                    }
                }
@@ -237,8 +290,8 @@ gbp_policy_inline (vlib_main_t * vm,
                vlib_add_trace (vm, node, b0, sizeof (*t));
              t->src_epg = key0.gck_src;
              t->dst_epg = key0.gck_dst;
-             t->acl_index = value0.gc_acl_index;
-             t->allowed = (next0 != GBP_POLICY_NEXT_DENY);
+             t->acl_index = (gc0 ? gc0->gc_acl_index : ~0),
+               t->allowed = (next0 != GBP_POLICY_NEXT_DENY);
            }
 
          /* verify speculative enqueue, maybe switch current next frame */
@@ -308,15 +361,7 @@ VLIB_REGISTER_NODE (gbp_policy_mac_node) = {
   .vector_size = sizeof (u32),
   .format_trace = format_gbp_policy_trace,
   .type = VLIB_NODE_TYPE_INTERNAL,
-
-  .n_errors = ARRAY_LEN(gbp_policy_error_strings),
-  .error_strings = gbp_policy_error_strings,
-
-  .n_next_nodes = GBP_POLICY_N_NEXT,
-
-  .next_nodes = {
-    [GBP_POLICY_NEXT_DENY] = "error-drop",
-  },
+  .sibling_of = "gbp-policy-port",
 };
 
 VLIB_NODE_FUNCTION_MULTIARCH (gbp_policy_mac_node, gbp_policy_mac);
index fd9dbce..7c53d1b 100644 (file)
@@ -236,6 +236,23 @@ typedef enum
   GBP_POLICY_N_NEXT,
 } gbp_policy_next_t;
 
+always_inline u32
+gbp_rule_l3_redirect (const gbp_rule_t * gu, vlib_buffer_t * b0, int is_ip6)
+{
+  gbp_policy_node_t pnode;
+  const dpo_id_t *dpo;
+  dpo_proto_t dproto;
+
+  pnode = (is_ip6 ? GBP_POLICY_NODE_IP6 : GBP_POLICY_NODE_IP4);
+  dproto = (is_ip6 ? DPO_PROTO_IP6 : DPO_PROTO_IP4);
+  dpo = &gu->gu_dpo[pnode][dproto];
+
+  /* The flow hash is still valid as this is a IP packet being switched */
+  vnet_buffer (b0)->ip.adj_index[VLIB_TX] = dpo->dpoi_index;
+
+  return (dpo->dpoi_next_node);
+}
+
 always_inline uword
 gbp_policy_dpo_inline (vlib_main_t * vm,
                       vlib_node_runtime_t * node,
@@ -243,6 +260,7 @@ gbp_policy_dpo_inline (vlib_main_t * vm,
 {
   gbp_main_t *gm = &gbp_main;
   u32 n_left_from, next_index, *from, *to_next;
+  gbp_rule_t *gu;
 
   from = vlib_frame_vector_args (from_frame);
   n_left_from = from_frame->n_vectors;
@@ -260,10 +278,9 @@ gbp_policy_dpo_inline (vlib_main_t * vm,
          const gbp_policy_dpo_t *gpd0;
          u32 bi0, next0;
          gbp_contract_key_t key0;
-         gbp_contract_value_t value0 = {
-           .as_u64 = ~0,
-         };
+         gbp_contract_t *gc0;
          vlib_buffer_t *b0;
+         index_t gci0;
 
          bi0 = from[0];
          to_next[0] = bi0;
@@ -275,6 +292,7 @@ gbp_policy_dpo_inline (vlib_main_t * vm,
 
          b0 = vlib_get_buffer (vm, bi0);
 
+         gc0 = NULL;
          gpd0 =
            gbp_policy_dpo_get_i (vnet_buffer (b0)->ip.adj_index[VLIB_TX]);
          vnet_buffer (b0)->ip.adj_index[VLIB_TX] = gpd0->gpd_dpo.dpoi_index;
@@ -301,9 +319,9 @@ gbp_policy_dpo_inline (vlib_main_t * vm,
                }
              else
                {
-                 value0.as_u64 = gbp_acl_lookup (&key0);
+                 gci0 = gbp_contract_find (&key0);
 
-                 if (~0 != value0.gc_lc_index)
+                 if (INDEX_INVALID != gci0)
                    {
                      fa_5tuple_opaque_t pkt_5tuple0;
                      u8 action0 = 0;
@@ -312,16 +330,17 @@ gbp_policy_dpo_inline (vlib_main_t * vm,
                      /*
                       * tests against the ACL
                       */
+                     gc0 = gbp_contract_get (gci0);
                      acl_plugin_fill_5tuple_inline (gm->
                                                     acl_plugin.p_acl_main,
-                                                    value0.gc_lc_index, b0,
+                                                    gc0->gc_lc_index, b0,
                                                     is_ip6,
                                                     /* is_input */ 1,
                                                     /* is_l2_path */ 0,
                                                     &pkt_5tuple0);
                      acl_plugin_match_5tuple_inline (gm->
                                                      acl_plugin.p_acl_main,
-                                                     value0.gc_lc_index,
+                                                     gc0->gc_lc_index,
                                                      &pkt_5tuple0, is_ip6,
                                                      &action0, &acl_pos_p0,
                                                      &acl_match_p0,
@@ -330,8 +349,23 @@ gbp_policy_dpo_inline (vlib_main_t * vm,
 
                      if (action0 > 0)
                        {
+
                          vnet_buffer2 (b0)->gbp.flags |= VXLAN_GBP_GPFLAGS_A;
-                         next0 = gpd0->gpd_dpo.dpoi_next_node;
+                         gu = gbp_rule_get (gc0->gc_rules[rule_match_p0]);
+
+                         switch (gu->gu_action)
+                           {
+                           case GBP_RULE_PERMIT:
+                             next0 = gpd0->gpd_dpo.dpoi_next_node;
+                             break;
+                           case GBP_RULE_DENY:
+                             ASSERT (0);
+                             next0 = 0;
+                             break;
+                           case GBP_RULE_REDIRECT:
+                             next0 = gbp_rule_l3_redirect (gu, b0, is_ip6);
+                             break;
+                           }
                        }
                    }
                }
@@ -352,7 +386,7 @@ gbp_policy_dpo_inline (vlib_main_t * vm,
              tr = vlib_add_trace (vm, node, b0, sizeof (*tr));
              tr->src_epg = key0.gck_src;
              tr->dst_epg = key0.gck_dst;
-             tr->acl_index = value0.gc_acl_index;
+             tr->acl_index = (gc0 ? gc0->gc_acl_index : ~0);
              tr->a_bit = vnet_buffer2 (b0)->gbp.flags & VXLAN_GBP_GPFLAGS_A;
            }
 
index d6a8986..8ee086d 100644 (file)
@@ -59,6 +59,10 @@ extern gbp_policy_dpo_t *gbp_policy_dpo_get (index_t index);
 
 extern dpo_type_t gbp_policy_dpo_get_type (void);
 
+extern vlib_node_registration_t ip4_gbp_policy_dpo_node;
+extern vlib_node_registration_t ip6_gbp_policy_dpo_node;
+extern vlib_node_registration_t gbp_policy_port_node;
+
 /*
  * fd.io coding-style-patch-verification: ON
  *
index 57ba408..59588ea 100644 (file)
@@ -66,11 +66,12 @@ gbp_recirc_add (u32 sw_if_index, epg_id_t epg_id, u8 is_ext)
       fib_protocol_t fproto;
       index_t ggi;
 
-      ggi = gbp_endpoint_group_find_and_lock (epg_id);
+      ggi = gbp_endpoint_group_find (epg_id);
 
       if (INDEX_INVALID == ggi)
        return (VNET_API_ERROR_NO_SUCH_ENTRY);
 
+      gbp_endpoint_group_lock (ggi);
       pool_get (gbp_recirc_pool, gr);
       clib_memset (gr, 0, sizeof (*gr));
       gri = gr - gbp_recirc_pool;
@@ -119,10 +120,12 @@ gbp_recirc_add (u32 sw_if_index, epg_id_t epg_id, u8 is_ext)
          mac_address_from_bytes (&mac,
                                  vnet_sw_interface_get_hw_address
                                  (vnet_get_main (), gr->gr_sw_if_index));
-         gbp_endpoint_update (gr->gr_sw_if_index,
-                              NULL, &mac, gr->gr_epg,
-                              GBP_ENDPOINT_FLAG_NONE,
-                              NULL, NULL, &gr->gr_ep);
+         gbp_endpoint_update_and_lock (GBP_ENDPOINT_SRC_CP,
+                                       gr->gr_sw_if_index,
+                                       NULL, &mac, INDEX_INVALID,
+                                       INDEX_INVALID, gr->gr_epg,
+                                       GBP_ENDPOINT_FLAG_NONE,
+                                       NULL, NULL, &gr->gr_ep);
          vnet_feature_enable_disable ("ip4-unicast",
                                       "ip4-gbp-src-classify",
                                       gr->gr_sw_if_index, 1, 0, 0);
@@ -172,7 +175,7 @@ gbp_recirc_delete (u32 sw_if_index)
 
       if (gr->gr_is_ext)
        {
-         gbp_endpoint_delete (gr->gr_ep);
+         gbp_endpoint_unlock (GBP_ENDPOINT_SRC_CP, gr->gr_ep);
          vnet_feature_enable_disable ("ip4-unicast",
                                       "ip4-gbp-src-classify",
                                       gr->gr_sw_if_index, 0, 0, 0);
index c030ee5..df3959e 100644 (file)
@@ -62,6 +62,12 @@ vlib_log_class_t grd_logger;
 #define GBP_BD_DBG(...)                           \
     vlib_log_debug (grd_logger, __VA_ARGS__);
 
+index_t
+gbp_route_domain_index (const gbp_route_domain_t * grd)
+{
+  return (grd - gbp_route_domain_pool);
+}
+
 gbp_route_domain_t *
 gbp_route_domain_get (index_t i)
 {
index f7fc4a4..ba5d5e4 100644 (file)
@@ -62,6 +62,7 @@ extern int gbp_route_domain_add_and_lock (u32 rd_id,
 extern void gbp_route_domain_unlock (index_t grdi);
 extern index_t gbp_route_domain_find_and_lock (u32 rd_id);
 extern index_t gbp_route_domain_find (u32 rd_id);
+extern index_t gbp_route_domain_index (const gbp_route_domain_t *);
 
 extern int gbp_route_domain_delete (u32 rd_id);
 extern gbp_route_domain_t *gbp_route_domain_get (index_t i);
index 7464ccf..2b264f8 100644 (file)
@@ -781,7 +781,7 @@ gbp_vxlan_tunnel_del (u32 vni)
       GBP_VXLAN_TUN_DBG ("del: %U", format_gbp_vxlan_tunnel,
                         gt - gbp_vxlan_tunnel_pool);
 
-      gbp_endpoint_flush (gt->gt_sw_if_index);
+      gbp_endpoint_flush (GBP_ENDPOINT_SRC_DP, gt->gt_sw_if_index);
       ASSERT (0 == vec_len (gt->gt_tuns));
       vec_free (gt->gt_tuns);
 
index 2fced18..3888509 100644 (file)
@@ -265,26 +265,24 @@ adj_nbr_add_or_lock_w_rewrite (fib_protocol_t nh_proto,
                               u8 *rewrite)
 {
     adj_index_t adj_index;
-    ip_adjacency_t *adj;
 
     adj_index = adj_nbr_find(nh_proto, link_type, nh_addr, sw_if_index);
 
     if (ADJ_INDEX_INVALID == adj_index)
     {
-       adj = adj_nbr_alloc(nh_proto, link_type, nh_addr, sw_if_index);
+        ip_adjacency_t *adj;
+
+        adj = adj_nbr_alloc(nh_proto, link_type, nh_addr, sw_if_index);
        adj->rewrite_header.sw_if_index = sw_if_index;
-    }
-    else
-    {
-        adj = adj_get(adj_index);
+        adj_index = adj_get_index(adj);
     }
 
-    adj_lock(adj_get_index(adj));
-    adj_nbr_update_rewrite(adj_get_index(adj),
+    adj_lock(adj_index);
+    adj_nbr_update_rewrite(adj_index,
                           ADJ_NBR_REWRITE_FLAG_COMPLETE,
                           rewrite);
 
-    return (adj_get_index(adj));
+    return (adj_index);
 }
 
 /**
index 92480ce..2cceba6 100644 (file)
@@ -365,23 +365,62 @@ class VppGbpRouteDomain(VppObject):
         return False
 
 
+class VppGbpContractNextHop():
+    def __init__(self, mac, bd, ip, rd):
+        self.mac = mac
+        self.ip = ip
+        self.bd = bd
+        self.rd = rd
+
+    def encode(self):
+        return {'ip': self.ip.encode(),
+                'mac': self.mac.encode(),
+                'bd_id': self.bd.bd.bd_id,
+                'rd_id': self.rd.rd_id}
+
+
+class VppGbpContractRule():
+    def __init__(self, action, nhs=[]):
+        self.action = action
+        self.nhs = nhs
+        e = VppEnum.vl_api_gbp_hash_mode_t
+        self.hash_mode = e.GBP_API_HASH_MODE_SRC_IP
+
+    def encode(self):
+        nhs = []
+        for nh in self.nhs:
+            nhs.append(nh.encode())
+        while len(nhs) < 8:
+            nhs.append({})
+        return {'action': self.action,
+                'nh_set': {
+                    'hash_mode': self.hash_mode,
+                    'n_nhs': len(self.nhs),
+                    'nhs': nhs}}
+
+
 class VppGbpContract(VppObject):
     """
     GBP Contract
     """
 
-    def __init__(self, test, src_epg, dst_epg, acl_index):
+    def __init__(self, test, src_epg, dst_epg, acl_index, rules=[]):
         self._test = test
         self.acl_index = acl_index
         self.src_epg = src_epg
         self.dst_epg = dst_epg
+        self.rules = rules
 
     def add_vpp_config(self):
+        rules = []
+        for r in self.rules:
+            rules.append(r.encode())
         self._test.vapi.gbp_contract_add_del(
             1,
             self.src_epg,
             self.dst_epg,
-            self.acl_index)
+            self.acl_index,
+            rules)
         self._test.registry.register(self, self._test.logger)
 
     def remove_vpp_config(self):
@@ -389,7 +428,8 @@ class VppGbpContract(VppObject):
             0,
             self.src_epg,
             self.dst_epg,
-            self.acl_index)
+            self.acl_index,
+            [])
 
     def __str__(self):
         return self.object_id()
@@ -1059,7 +1099,14 @@ class TestGBP(VppTestCase):
         rule = acl.create_rule(permit_deny=1, proto=17)
         rule2 = acl.create_rule(is_ipv6=1, permit_deny=1, proto=17)
         acl_index = acl.add_vpp_config([rule, rule2])
-        c1 = VppGbpContract(self, 220, 221, acl_index)
+        c1 = VppGbpContract(
+            self, 220, 221, acl_index,
+            [VppGbpContractRule(
+                VppEnum.vl_api_gbp_rule_action_t.GBP_API_RULE_PERMIT,
+                []),
+             VppGbpContractRule(
+                 VppEnum.vl_api_gbp_rule_action_t.GBP_API_RULE_PERMIT,
+                 [])])
         c1.add_vpp_config()
 
         self.send_and_expect_bridged(eps[0].itf,
@@ -1071,7 +1118,15 @@ class TestGBP(VppTestCase):
         #
         # contract for the return direction
         #
-        c2 = VppGbpContract(self, 221, 220, acl_index)
+        c2 = VppGbpContract(
+            self, 221, 220, acl_index,
+            [VppGbpContractRule(
+                VppEnum.vl_api_gbp_rule_action_t.GBP_API_RULE_PERMIT,
+                []),
+             VppGbpContractRule(
+                 VppEnum.vl_api_gbp_rule_action_t.GBP_API_RULE_PERMIT,
+                 [])])
+
         c2.add_vpp_config()
 
         self.send_and_expect_bridged(eps[0].itf,
@@ -1091,7 +1146,15 @@ class TestGBP(VppTestCase):
         #
         # A uni-directional contract from EPG 220 -> 222 'L3 routed'
         #
-        c3 = VppGbpContract(self, 220, 222, acl_index)
+        c3 = VppGbpContract(
+            self, 220, 222, acl_index,
+            [VppGbpContractRule(
+                VppEnum.vl_api_gbp_rule_action_t.GBP_API_RULE_PERMIT,
+                []),
+             VppGbpContractRule(
+                 VppEnum.vl_api_gbp_rule_action_t.GBP_API_RULE_PERMIT,
+                 [])])
+
         c3.add_vpp_config()
 
         self.logger.info(self.vapi.cli("sh gbp contract"))
@@ -1188,7 +1251,15 @@ class TestGBP(VppTestCase):
                                  dport_from=1234, dport_to=1234)
 
         acl_index2 = acl2.add_vpp_config([rule, rule2])
-        c4 = VppGbpContract(self, 220, 333, acl_index2)
+        c4 = VppGbpContract(
+            self, 220, 333, acl_index2,
+            [VppGbpContractRule(
+                VppEnum.vl_api_gbp_rule_action_t.GBP_API_RULE_PERMIT,
+                []),
+             VppGbpContractRule(
+                 VppEnum.vl_api_gbp_rule_action_t.GBP_API_RULE_PERMIT,
+                 [])])
+
         c4.add_vpp_config()
 
         self.send_and_expect_natted(eps[0].itf,
@@ -1221,7 +1292,15 @@ class TestGBP(VppTestCase):
         self.send_and_assert_no_replies(self.pg7,
                                         pkt_inter_epg_220_from_global * 65)
 
-        c5 = VppGbpContract(self, 333, 220, acl_index2)
+        c5 = VppGbpContract(
+            self, 333, 220, acl_index2,
+            [VppGbpContractRule(
+                VppEnum.vl_api_gbp_rule_action_t.GBP_API_RULE_PERMIT,
+                []),
+             VppGbpContractRule(
+                 VppEnum.vl_api_gbp_rule_action_t.GBP_API_RULE_PERMIT,
+                 [])])
+
         c5.add_vpp_config()
 
         self.send_and_expect_unnatted(self.pg7,
@@ -1580,16 +1659,14 @@ class TestGBP(VppTestCase):
 
         #
         # static EP cannot reach the learnt EPs since there is no contract
+        # only test 1 EP as the others could timeout
         #
-        self.logger.info(self.vapi.cli("show gbp endpoint"))
-        self.logger.info(self.vapi.cli("show l2fib all"))
-        for l in learnt:
-            p = (Ether(src=ep.mac, dst=l['mac']) /
-                 IP(dst=l['ip'], src=ep.ip4.address) /
-                 UDP(sport=1234, dport=1234) /
-                 Raw('\xa5' * 100))
+        p = (Ether(src=ep.mac, dst=l['mac']) /
+             IP(dst=learnt[0]['ip'], src=ep.ip4.address) /
+             UDP(sport=1234, dport=1234) /
+             Raw('\xa5' * 100))
 
-            self.send_and_assert_no_replies(self.pg0, [p], timeout=0.2)
+        self.send_and_assert_no_replies(self.pg0, [p])
 
         #
         # refresh the entries after the check for no replies above
@@ -1620,7 +1697,15 @@ class TestGBP(VppTestCase):
         rule = acl.create_rule(permit_deny=1, proto=17)
         rule2 = acl.create_rule(is_ipv6=1, permit_deny=1, proto=17)
         acl_index = acl.add_vpp_config([rule, rule2])
-        c1 = VppGbpContract(self, 220, 330, acl_index)
+        c1 = VppGbpContract(
+            self, 220, 330, acl_index,
+            [VppGbpContractRule(
+                VppEnum.vl_api_gbp_rule_action_t.GBP_API_RULE_PERMIT,
+                []),
+             VppGbpContractRule(
+                 VppEnum.vl_api_gbp_rule_action_t.GBP_API_RULE_PERMIT,
+                 [])])
+
         c1.add_vpp_config()
 
         for l in learnt:
@@ -1874,6 +1959,8 @@ class TestGBP(VppTestCase):
     def test_gbp_learn_l3(self):
         """ GBP L3 Endpoint Learning """
 
+        self.vapi.cli("set logging class gbp debug")
+
         routed_dst_mac = "00:0c:0c:0c:0c:0c"
         routed_src_mac = "00:22:bd:f8:19:ff"
 
@@ -1942,7 +2029,6 @@ class TestGBP(VppTestCase):
         self.logger.info(self.vapi.cli("sh bridge 1 detail"))
         self.logger.info(self.vapi.cli("sh gbp bridge"))
         self.logger.info(self.vapi.cli("sh gbp route"))
-        self.logger.info(self.vapi.cli("show l2fib all"))
 
         # ... and has a /32 and /128 applied
         ip4_addr = VppIpInterfaceAddress(self, gbd1.bvi, "10.0.0.128", 32)
@@ -2257,27 +2343,57 @@ class TestGBP(VppTestCase):
                 self.assertEqual(inner[IP].dst, ip)
 
         #
-        # remove the API remote EPs, they are now UU-fwd
+        # remove the API remote EPs, only API sourced is gone, the DP
+        # learnt one remains
         #
         rep_88.remove_vpp_config()
         rep_2.remove_vpp_config()
 
         self.logger.info(self.vapi.cli("show gbp endpoint"))
 
-        for ip in ips:
-            self.assertFalse(find_gbp_endpoint(self, ip=ip))
+        self.assertFalse(find_gbp_endpoint(self, ip=rep_88.ip4.address))
 
-            p = (Ether(src=ep.mac, dst=self.loop0.local_mac) /
-                 IP(dst=ip, src=ep.ip4.address) /
-                 UDP(sport=1234, dport=1234) /
-                 Raw('\xa5' * 100))
+        p = (Ether(src=ep.mac, dst=self.loop0.local_mac) /
+             IP(src=ep.ip4.address, dst=rep_88.ip4.address) /
+             UDP(sport=1234, dport=1234) /
+             Raw('\xa5' * 100))
+        rxs = self.send_and_expect(self.pg0, [p], self.pg4)
+
+        self.assertTrue(find_gbp_endpoint(self, ip=rep_2.ip4.address))
 
-            rxs = self.send_and_expect(self.pg0, [p], self.pg4)
+        p = (Ether(src=ep.mac, dst=self.loop0.local_mac) /
+             IP(src=ep.ip4.address, dst=rep_2.ip4.address) /
+             UDP(sport=1234, dport=1234) /
+             Raw('\xa5' * 100))
+        rxs = self.send_and_expect(self.pg0, [p], self.pg2)
+
+        #
+        # to appease the testcase we cannot have the registered EP stll
+        # present (because it's DP learnt) when the TC ends so wait until
+        # it is removed
+        #
+        self.sleep(2)
 
         #
         # shutdown with learnt endpoint present
         #
-        self.logger.info(self.vapi.cli("show gbp endpoint-group"))
+        p = (Ether(src=self.pg2.remote_mac,
+                   dst=self.pg2.local_mac) /
+             IP(src=self.pg2.remote_hosts[1].ip4,
+                dst=self.pg2.local_ip4) /
+             UDP(sport=1234, dport=48879) /
+             VXLAN(vni=101, gpid=220, flags=0x88) /
+             Ether(src=l['mac'], dst="00:00:00:11:11:11") /
+             IP(src=learnt[1]['ip'], dst=ep.ip4.address) /
+             UDP(sport=1234, dport=1234) /
+             Raw('\xa5' * 100))
+
+        rx = self.send_and_expect(self.pg2, [p], self.pg0)
+
+        # endpoint learnt via the parent GBP-vxlan interface
+        self.assertTrue(find_gbp_endpoint(self,
+                                          vx_tun_l3._sw_if_index,
+                                          ip=l['ip']))
 
         #
         # TODO
@@ -2287,6 +2403,464 @@ class TestGBP(VppTestCase):
         self.pg3.unconfig_ip4()
         self.pg4.unconfig_ip4()
 
+    def test_gbp_redirect(self):
+        """ GBP Endpoint Redirect """
+
+        self.vapi.cli("set logging class gbp debug")
+
+        routed_dst_mac = "00:0c:0c:0c:0c:0c"
+        routed_src_mac = "00:22:bd:f8:19:ff"
+
+        learnt = [{'mac': '00:00:11:11:11:02',
+                   'ip': '10.0.1.2',
+                   'ip6': '2001:10::2'},
+                  {'mac': '00:00:11:11:11:03',
+                   'ip': '10.0.1.3',
+                   'ip6': '2001:10::3'}]
+
+        #
+        # lower the inactive threshold so these tests pass in a
+        # reasonable amount of time
+        #
+        self.vapi.gbp_endpoint_learn_set_inactive_threshold(1)
+
+        #
+        # IP tables
+        #
+        t4 = VppIpTable(self, 1)
+        t4.add_vpp_config()
+        t6 = VppIpTable(self, 1, True)
+        t6.add_vpp_config()
+
+        rd1 = VppGbpRouteDomain(self, 2, t4, t6)
+        rd1.add_vpp_config()
+
+        self.loop0.set_mac(self.router_mac.address)
+
+        #
+        # Bind the BVI to the RD
+        #
+        VppIpInterfaceBind(self, self.loop0, t4).add_vpp_config()
+        VppIpInterfaceBind(self, self.loop0, t6).add_vpp_config()
+
+        #
+        # Pg7 hosts a BD's UU-fwd
+        #
+        self.pg7.config_ip4()
+        self.pg7.resolve_arp()
+
+        #
+        # a GBP bridge domains for the EPs
+        #
+        bd1 = VppBridgeDomain(self, 1)
+        bd1.add_vpp_config()
+        gbd1 = VppGbpBridgeDomain(self, bd1, self.loop0)
+        gbd1.add_vpp_config()
+
+        bd2 = VppBridgeDomain(self, 2)
+        bd2.add_vpp_config()
+        gbd2 = VppGbpBridgeDomain(self, bd2, self.loop1)
+        gbd2.add_vpp_config()
+
+        # ... and has a /32 and /128 applied
+        ip4_addr = VppIpInterfaceAddress(self, gbd1.bvi, "10.0.0.128", 32)
+        ip4_addr.add_vpp_config()
+        ip6_addr = VppIpInterfaceAddress(self, gbd1.bvi, "2001:10::128", 128)
+        ip6_addr.add_vpp_config()
+        ip4_addr = VppIpInterfaceAddress(self, gbd2.bvi, "10.0.1.128", 32)
+        ip4_addr.add_vpp_config()
+        ip6_addr = VppIpInterfaceAddress(self, gbd2.bvi, "2001:11::128", 128)
+        ip6_addr.add_vpp_config()
+
+        #
+        # The Endpoint-groups in which we are learning endpoints
+        #
+        epg_220 = VppGbpEndpointGroup(self, 220, rd1, gbd1,
+                                      None, gbd1.bvi,
+                                      "10.0.0.128",
+                                      "2001:10::128")
+        epg_220.add_vpp_config()
+        epg_221 = VppGbpEndpointGroup(self, 221, rd1, gbd2,
+                                      None, gbd2.bvi,
+                                      "10.0.1.128",
+                                      "2001:11::128")
+        epg_221.add_vpp_config()
+        epg_222 = VppGbpEndpointGroup(self, 222, rd1, gbd1,
+                                      None, gbd1.bvi,
+                                      "10.0.2.128",
+                                      "2001:12::128")
+        epg_222.add_vpp_config()
+
+        #
+        # a GBP bridge domains for the SEPs
+        #
+        bd_uu1 = VppVxlanGbpTunnel(self, self.pg7.local_ip4,
+                                   self.pg7.remote_ip4, 116)
+        bd_uu1.add_vpp_config()
+        bd_uu2 = VppVxlanGbpTunnel(self, self.pg7.local_ip4,
+                                   self.pg7.remote_ip4, 117)
+        bd_uu2.add_vpp_config()
+
+        bd3 = VppBridgeDomain(self, 3)
+        bd3.add_vpp_config()
+        gbd3 = VppGbpBridgeDomain(self, bd3, self.loop2, bd_uu1, learn=False)
+        gbd3.add_vpp_config()
+        bd4 = VppBridgeDomain(self, 4)
+        bd4.add_vpp_config()
+        gbd4 = VppGbpBridgeDomain(self, bd4, self.loop3, bd_uu2, learn=False)
+        gbd4.add_vpp_config()
+
+        #
+        # EPGs in which the service endpoints exist
+        #
+        epg_320 = VppGbpEndpointGroup(self, 320, rd1, gbd3,
+                                      None, gbd1.bvi,
+                                      "12.0.0.128",
+                                      "4001:10::128")
+        epg_320.add_vpp_config()
+        epg_321 = VppGbpEndpointGroup(self, 321, rd1, gbd4,
+                                      None, gbd2.bvi,
+                                      "12.0.1.128",
+                                      "4001:11::128")
+        epg_321.add_vpp_config()
+
+        #
+        # three local endpoints
+        #
+        ep1 = VppGbpEndpoint(self, self.pg0,
+                             epg_220, None,
+                             "10.0.0.1", "11.0.0.1",
+                             "2001:10::1", "3001:10::1")
+        ep1.add_vpp_config()
+        ep2 = VppGbpEndpoint(self, self.pg1,
+                             epg_221, None,
+                             "10.0.1.1", "11.0.1.1",
+                             "2001:11::1", "3001:11::1")
+        ep2.add_vpp_config()
+        ep3 = VppGbpEndpoint(self, self.pg2,
+                             epg_222, None,
+                             "10.0.2.2", "11.0.2.2",
+                             "2001:12::1", "3001:12::1")
+        ep3.add_vpp_config()
+
+        #
+        # service endpoints
+        #
+        sep1 = VppGbpEndpoint(self, self.pg3,
+                              epg_320, None,
+                              "12.0.0.1", "13.0.0.1",
+                              "4001:10::1", "5001:10::1")
+        sep1.add_vpp_config()
+        sep2 = VppGbpEndpoint(self, self.pg4,
+                              epg_320, None,
+                              "12.0.0.2", "13.0.0.2",
+                              "4001:10::2", "5001:10::2")
+        sep2.add_vpp_config()
+        sep3 = VppGbpEndpoint(self, self.pg5,
+                              epg_321, None,
+                              "12.0.1.1", "13.0.1.1",
+                              "4001:11::1", "5001:11::1")
+        sep3.add_vpp_config()
+        # this EP is not installed immediately
+        sep4 = VppGbpEndpoint(self, self.pg6,
+                              epg_321, None,
+                              "12.0.1.2", "13.0.1.2",
+                              "4001:11::2", "5001:11::2")
+
+        #
+        # an L2 switch packet between local EPs in different EPGs
+        #  different dest ports on each so the are LB hashed differently
+        #
+        p4 = [(Ether(src=ep1.mac, dst=ep3.mac) /
+               IP(src=ep1.ip4.address, dst=ep3.ip4.address) /
+               UDP(sport=1234, dport=1234) /
+               Raw('\xa5' * 100)),
+              (Ether(src=ep1.mac, dst=ep3.mac) /
+               IP(src=ep1.ip4.address, dst=ep3.ip4.address) /
+               UDP(sport=1234, dport=1235) /
+               Raw('\xa5' * 100))]
+        p6 = [(Ether(src=ep1.mac, dst=ep3.mac) /
+               IPv6(src=ep1.ip6.address, dst=ep3.ip6.address) /
+               UDP(sport=1234, dport=1234) /
+               Raw('\xa5' * 100)),
+              (Ether(src=ep1.mac, dst=ep3.mac) /
+               IPv6(src=ep1.ip6.address, dst=ep3.ip6.address) /
+               UDP(sport=1234, dport=1230) /
+               Raw('\xa5' * 100))]
+
+        # should be dropped since no contract yet
+        self.send_and_assert_no_replies(self.pg0, [p4[0]])
+        self.send_and_assert_no_replies(self.pg0, [p6[0]])
+
+        #
+        # Add a contract with a rule to load-balance redirect via SEP1 and SEP2
+        # one of the next-hops is via an EP that is not known
+        #
+        acl = VppGbpAcl(self)
+        rule4 = acl.create_rule(permit_deny=1, proto=17)
+        rule6 = acl.create_rule(is_ipv6=1, permit_deny=1, proto=17)
+        acl_index = acl.add_vpp_config([rule4, rule6])
+
+        c1 = VppGbpContract(
+            self, 220, 222, acl_index,
+            [VppGbpContractRule(
+                VppEnum.vl_api_gbp_rule_action_t.GBP_API_RULE_REDIRECT,
+                [VppGbpContractNextHop(sep1.vmac, sep1.epg.bd,
+                                       sep1.ip4, sep1.epg.rd),
+                 VppGbpContractNextHop(sep2.vmac, sep2.epg.bd,
+                                       sep2.ip4, sep2.epg.rd)]),
+             VppGbpContractRule(
+                 VppEnum.vl_api_gbp_rule_action_t.GBP_API_RULE_REDIRECT,
+                 [VppGbpContractNextHop(sep3.vmac, sep3.epg.bd,
+                                        sep3.ip6, sep3.epg.rd),
+                  VppGbpContractNextHop(sep4.vmac, sep4.epg.bd,
+                                        sep4.ip6, sep4.epg.rd)])])
+        c1.add_vpp_config()
+
+        #
+        # send again with the contract preset, now packets arrive
+        # at SEP1 or SEP2 depending on the hashing
+        #
+        rxs = self.send_and_expect(self.pg0, p4[0] * 17, sep2.itf)
+
+        for rx in rxs:
+            self.assertEqual(rx[Ether].src, routed_src_mac)
+            self.assertEqual(rx[Ether].dst, sep2.mac)
+            self.assertEqual(rx[IP].src, ep1.ip4.address)
+            self.assertEqual(rx[IP].dst, ep3.ip4.address)
+
+        rxs = self.send_and_expect(self.pg0, p4[1] * 17, sep1.itf)
+
+        for rx in rxs:
+            self.assertEqual(rx[Ether].src, routed_src_mac)
+            self.assertEqual(rx[Ether].dst, sep1.mac)
+            self.assertEqual(rx[IP].src, ep1.ip4.address)
+            self.assertEqual(rx[IP].dst, ep3.ip4.address)
+
+        rxs = self.send_and_expect(self.pg0, p6[0] * 17, self.pg7)
+
+        for rx in rxs:
+            self.assertEqual(rx[Ether].src, self.pg7.local_mac)
+            self.assertEqual(rx[Ether].dst, self.pg7.remote_mac)
+            self.assertEqual(rx[IP].src, self.pg7.local_ip4)
+            self.assertEqual(rx[IP].dst, self.pg7.remote_ip4)
+            self.assertEqual(rx[VXLAN].vni, 117)
+            self.assertTrue(rx[VXLAN].flags.G)
+            self.assertTrue(rx[VXLAN].flags.Instance)
+            # redirect policy has been applied
+            self.assertTrue(rx[VXLAN].gpflags.A)
+            self.assertFalse(rx[VXLAN].gpflags.D)
+
+            inner = rx[VXLAN].payload
+
+            self.assertEqual(inner[Ether].src, routed_src_mac)
+            self.assertEqual(inner[Ether].dst, sep4.mac)
+            self.assertEqual(inner[IPv6].src, ep1.ip6.address)
+            self.assertEqual(inner[IPv6].dst, ep3.ip6.address)
+
+        rxs = self.send_and_expect(self.pg0, p6[1] * 17, sep3.itf)
+
+        for rx in rxs:
+            self.assertEqual(rx[Ether].src, routed_src_mac)
+            self.assertEqual(rx[Ether].dst, sep3.mac)
+            self.assertEqual(rx[IPv6].src, ep1.ip6.address)
+            self.assertEqual(rx[IPv6].dst, ep3.ip6.address)
+
+        #
+        # programme the unknown EP
+        #
+        sep4.add_vpp_config()
+
+        rxs = self.send_and_expect(self.pg0, p6[0] * 17, sep4.itf)
+
+        for rx in rxs:
+            self.assertEqual(rx[Ether].src, routed_src_mac)
+            self.assertEqual(rx[Ether].dst, sep4.mac)
+            self.assertEqual(rx[IPv6].src, ep1.ip6.address)
+            self.assertEqual(rx[IPv6].dst, ep3.ip6.address)
+
+        #
+        # and revert back to unprogrammed
+        #
+        sep4.remove_vpp_config()
+
+        rxs = self.send_and_expect(self.pg0, p6[0] * 17, self.pg7)
+
+        for rx in rxs:
+            self.assertEqual(rx[Ether].src, self.pg7.local_mac)
+            self.assertEqual(rx[Ether].dst, self.pg7.remote_mac)
+            self.assertEqual(rx[IP].src, self.pg7.local_ip4)
+            self.assertEqual(rx[IP].dst, self.pg7.remote_ip4)
+            self.assertEqual(rx[VXLAN].vni, 117)
+            self.assertTrue(rx[VXLAN].flags.G)
+            self.assertTrue(rx[VXLAN].flags.Instance)
+            # redirect policy has been applied
+            self.assertTrue(rx[VXLAN].gpflags.A)
+            self.assertFalse(rx[VXLAN].gpflags.D)
+
+            inner = rx[VXLAN].payload
+
+            self.assertEqual(inner[Ether].src, routed_src_mac)
+            self.assertEqual(inner[Ether].dst, sep4.mac)
+            self.assertEqual(inner[IPv6].src, ep1.ip6.address)
+            self.assertEqual(inner[IPv6].dst, ep3.ip6.address)
+
+        #
+        # programme the unknown EP for the L3 tests
+        #
+        sep4.add_vpp_config()
+
+        #
+        # an L3 switch packet between local EPs in different EPGs
+        #  different dest ports on each so the are LB hashed differently
+        #
+        p4 = [(Ether(src=ep1.mac, dst=self.router_mac.address) /
+               IP(src=ep1.ip4.address, dst=ep2.ip4.address) /
+               UDP(sport=1234, dport=1234) /
+               Raw('\xa5' * 100)),
+              (Ether(src=ep1.mac, dst=self.router_mac.address) /
+               IP(src=ep1.ip4.address, dst=ep2.ip4.address) /
+               UDP(sport=1234, dport=1235) /
+               Raw('\xa5' * 100))]
+        p6 = [(Ether(src=ep1.mac, dst=self.router_mac.address) /
+               IPv6(src=ep1.ip6.address, dst=ep2.ip6.address) /
+               UDP(sport=1234, dport=1234) /
+               Raw('\xa5' * 100)),
+              (Ether(src=ep1.mac, dst=self.router_mac.address) /
+               IPv6(src=ep1.ip6.address, dst=ep2.ip6.address) /
+               UDP(sport=1234, dport=1230) /
+               Raw('\xa5' * 100))]
+
+        c2 = VppGbpContract(
+            self, 220, 221, acl_index,
+            [VppGbpContractRule(
+                VppEnum.vl_api_gbp_rule_action_t.GBP_API_RULE_REDIRECT,
+                [VppGbpContractNextHop(sep1.vmac, sep1.epg.bd,
+                                       sep1.ip4, sep1.epg.rd),
+                 VppGbpContractNextHop(sep2.vmac, sep2.epg.bd,
+                                       sep2.ip4, sep2.epg.rd)]),
+             VppGbpContractRule(
+                 VppEnum.vl_api_gbp_rule_action_t.GBP_API_RULE_REDIRECT,
+                 [VppGbpContractNextHop(sep3.vmac, sep3.epg.bd,
+                                        sep3.ip6, sep3.epg.rd),
+                  VppGbpContractNextHop(sep4.vmac, sep4.epg.bd,
+                                        sep4.ip6, sep4.epg.rd)])])
+        c2.add_vpp_config()
+
+        rxs = self.send_and_expect(self.pg0, p4[0] * 17, sep2.itf)
+
+        for rx in rxs:
+            self.assertEqual(rx[Ether].src, routed_src_mac)
+            self.assertEqual(rx[Ether].dst, sep2.mac)
+            self.assertEqual(rx[IP].src, ep1.ip4.address)
+            self.assertEqual(rx[IP].dst, ep2.ip4.address)
+
+        #
+        # learn a remote EP in EPG 221
+        #
+        vx_tun_l3 = VppGbpVxlanTunnel(
+            self, 444, rd1.rd_id,
+            VppEnum.vl_api_gbp_vxlan_tunnel_mode_t.GBP_VXLAN_TUNNEL_MODE_L3)
+        vx_tun_l3.add_vpp_config()
+
+        c3 = VppGbpContract(
+            self, 221, 220, acl_index,
+            [VppGbpContractRule(
+                VppEnum.vl_api_gbp_rule_action_t.GBP_API_RULE_PERMIT,
+                []),
+             VppGbpContractRule(
+                 VppEnum.vl_api_gbp_rule_action_t.GBP_API_RULE_PERMIT,
+                 [])])
+        c3.add_vpp_config()
+
+        p = (Ether(src=self.pg7.remote_mac,
+                   dst=self.pg7.local_mac) /
+             IP(src=self.pg7.remote_ip4,
+                dst=self.pg7.local_ip4) /
+             UDP(sport=1234, dport=48879) /
+             VXLAN(vni=444, gpid=221, flags=0x88) /
+             Ether(src="00:22:22:22:22:33", dst=self.router_mac.address) /
+             IP(src="10.0.0.88", dst=ep1.ip4.address) /
+             UDP(sport=1234, dport=1234) /
+             Raw('\xa5' * 100))
+
+        rx = self.send_and_expect(self.pg7, [p], self.pg0)
+
+        # endpoint learnt via the parent GBP-vxlan interface
+        self.assertTrue(find_gbp_endpoint(self,
+                                          vx_tun_l3._sw_if_index,
+                                          ip="10.0.0.88"))
+
+        p = (Ether(src=self.pg7.remote_mac,
+                   dst=self.pg7.local_mac) /
+             IP(src=self.pg7.remote_ip4,
+                dst=self.pg7.local_ip4) /
+             UDP(sport=1234, dport=48879) /
+             VXLAN(vni=444, gpid=221, flags=0x88) /
+             Ether(src="00:22:22:22:22:33", dst=self.router_mac.address) /
+             IPv6(src="2001:10::88", dst=ep1.ip6.address) /
+             UDP(sport=1234, dport=1234) /
+             Raw('\xa5' * 100))
+
+        rx = self.send_and_expect(self.pg7, [p], self.pg0)
+
+        # endpoint learnt via the parent GBP-vxlan interface
+        self.assertTrue(find_gbp_endpoint(self,
+                                          vx_tun_l3._sw_if_index,
+                                          ip="2001:10::88"))
+
+        #
+        # L3 switch from local to remote EP
+        #
+        p4 = [(Ether(src=ep1.mac, dst=self.router_mac.address) /
+               IP(src=ep1.ip4.address, dst="10.0.0.88") /
+               UDP(sport=1234, dport=1234) /
+               Raw('\xa5' * 100)),
+              (Ether(src=ep1.mac, dst=self.router_mac.address) /
+               IP(src=ep1.ip4.address, dst="10.0.0.88") /
+               UDP(sport=1234, dport=1235) /
+               Raw('\xa5' * 100))]
+        p6 = [(Ether(src=ep1.mac, dst=self.router_mac.address) /
+               IPv6(src=ep1.ip6.address, dst="2001:10::88") /
+               UDP(sport=1234, dport=1234) /
+               Raw('\xa5' * 100)),
+              (Ether(src=ep1.mac, dst=self.router_mac.address) /
+               IPv6(src=ep1.ip6.address, dst="2001:10::88") /
+               UDP(sport=1234, dport=123) /
+               Raw('\xa5' * 100))]
+
+        rxs = self.send_and_expect(self.pg0, p4[0] * 17, sep2.itf)
+
+        for rx in rxs:
+            self.assertEqual(rx[Ether].src, routed_src_mac)
+            self.assertEqual(rx[Ether].dst, sep2.mac)
+            self.assertEqual(rx[IP].src, ep1.ip4.address)
+            self.assertEqual(rx[IP].dst, "10.0.0.88")
+
+        rxs = self.send_and_expect(self.pg0, p4[1] * 17, sep1.itf)
+
+        for rx in rxs:
+            self.assertEqual(rx[Ether].src, routed_src_mac)
+            self.assertEqual(rx[Ether].dst, sep1.mac)
+            self.assertEqual(rx[IP].src, ep1.ip4.address)
+            self.assertEqual(rx[IP].dst, "10.0.0.88")
+
+        rxs = self.send_and_expect(self.pg0, p6[0] * 17, sep4.itf)
+
+        for rx in rxs:
+            self.assertEqual(rx[Ether].src, routed_src_mac)
+            self.assertEqual(rx[Ether].dst, sep4.mac)
+            self.assertEqual(rx[IPv6].src, ep1.ip6.address)
+            self.assertEqual(rx[IPv6].dst, "2001:10::88")
+
+        rxs = self.send_and_expect(self.pg0, p6[1] * 17, sep3.itf)
+
+        for rx in rxs:
+            self.assertEqual(rx[Ether].src, routed_src_mac)
+            self.assertEqual(rx[Ether].dst, sep3.mac)
+            self.assertEqual(rx[IPv6].src, ep1.ip6.address)
+            self.assertEqual(rx[IPv6].dst, "2001:10::88")
+
 
 if __name__ == '__main__':
     unittest.main(testRunner=VppTestRunner)
index 1fcc4ce..8ed870e 100644 (file)
@@ -3631,14 +3631,16 @@ class VppPapiProvider(object):
         """ GBP Subnet Dump """
         return self.api(self.papi.gbp_subnet_dump, {})
 
-    def gbp_contract_add_del(self, is_add, src_epg, dst_epg, acl_index):
+    def gbp_contract_add_del(self, is_add, src_epg, dst_epg, acl_index, rules):
         """ GBP contract Add/Del """
         return self.api(self.papi.gbp_contract_add_del,
                         {'is_add': is_add,
                          'contract': {
                              'acl_index': acl_index,
                              'src_epg': src_epg,
-                             'dst_epg': dst_epg}})
+                             'dst_epg': dst_epg,
+                             'n_rules': len(rules),
+                             'rules': rules}})
 
     def gbp_contract_dump(self):
         """ GBP contract Dump """