2 * gbp.h : Group Based Policy
4 * Copyright (c) 2018 Cisco and/or its affiliates.
5 * Licensed under the Apache License, Version 2.0 (the "License");
6 * you may not use this file except in compliance with the License.
7 * You may obtain a copy of the License at:
9 * http://www.apache.org/licenses/LICENSE-2.0
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
18 #include <plugins/gbp/gbp_endpoint.h>
19 #include <plugins/gbp/gbp_endpoint_group.h>
20 #include <plugins/gbp/gbp_itf.h>
21 #include <plugins/gbp/gbp_scanner.h>
22 #include <plugins/gbp/gbp_bridge_domain.h>
23 #include <plugins/gbp/gbp_route_domain.h>
24 #include <plugins/gbp/gbp_policy_dpo.h>
25 #include <plugins/gbp/gbp_vxlan.h>
27 #include <vnet/l2/l2_input.h>
28 #include <vnet/l2/l2_output.h>
29 #include <vnet/l2/feat_bitmap.h>
30 #include <vnet/l2/l2_fib.h>
31 #include <vnet/fib/fib_table.h>
32 #include <vnet/ip-neighbor/ip_neighbor.h>
33 #include <vnet/ip-neighbor/ip4_neighbor.h>
34 #include <vnet/ip-neighbor/ip6_neighbor.h>
35 #include <vnet/fib/fib_walk.h>
36 #include <vnet/vxlan-gbp/vxlan_gbp.h>
38 static const char *gbp_endpoint_attr_names[] = GBP_ENDPOINT_ATTR_NAMES;
43 gbp_ep_db_t gbp_ep_db;
45 static fib_source_t gbp_fib_source_hi;
46 static fib_source_t gbp_fib_source_low;
47 static fib_node_type_t gbp_endpoint_fib_type;
48 static vlib_log_class_t gbp_ep_logger;
50 #define GBP_ENDPOINT_DBG(...) \
51 vlib_log_debug (gbp_ep_logger, __VA_ARGS__);
53 #define GBP_ENDPOINT_INFO(...) \
54 vlib_log_notice (gbp_ep_logger, __VA_ARGS__);
57 * Pool of GBP endpoints
59 gbp_endpoint_t *gbp_endpoint_pool;
62 * A count of the number of dynamic entries
64 static u32 gbp_n_learnt_endpoints;
66 #define FOR_EACH_GBP_ENDPOINT_ATTR(_item) \
67 for (_item = GBP_ENDPOINT_ATTR_FIRST; \
68 _item < GBP_ENDPOINT_ATTR_LAST; \
72 format_gbp_endpoint_flags (u8 * s, va_list * args)
74 gbp_endpoint_attr_t attr;
75 gbp_endpoint_flags_t flags = va_arg (*args, gbp_endpoint_flags_t);
77 FOR_EACH_GBP_ENDPOINT_ATTR (attr)
79 if ((1 << attr) & flags)
81 s = format (s, "%s,", gbp_endpoint_attr_names[attr]);
89 gbp_endpoint_is_remote (const gbp_endpoint_t * ge)
91 return (! !(ge->ge_fwd.gef_flags & GBP_ENDPOINT_FLAG_REMOTE));
95 gbp_endpoint_is_local (const gbp_endpoint_t * ge)
97 return (!(ge->ge_fwd.gef_flags & GBP_ENDPOINT_FLAG_REMOTE));
101 gbp_endpoint_is_external (const gbp_endpoint_t * ge)
103 return (! !(ge->ge_fwd.gef_flags & GBP_ENDPOINT_FLAG_EXTERNAL));
107 gbp_endpoint_is_learnt (const gbp_endpoint_t * ge)
109 if (0 == vec_len (ge->ge_locs))
112 /* DP is the highest source so if present it will be first */
113 return (ge->ge_locs[0].gel_src == GBP_ENDPOINT_SRC_DP);
117 gbp_endpoint_extract_key_mac_itf (const clib_bihash_kv_16_8_t * key,
118 mac_address_t * mac, u32 * sw_if_index)
120 mac_address_from_u64 (mac, key->key[0]);
121 *sw_if_index = key->key[1];
125 gbp_endpoint_extract_key_ip_itf (const clib_bihash_kv_24_8_t * key,
126 ip46_address_t * ip, u32 * sw_if_index)
128 ip->as_u64[0] = key->key[0];
129 ip->as_u64[1] = key->key[1];
130 *sw_if_index = key->key[2];
134 gbp_endpoint_find_ip (const ip46_address_t * ip, u32 fib_index)
136 clib_bihash_kv_24_8_t key, value;
139 gbp_endpoint_mk_key_ip (ip, fib_index, &key);
141 rv = clib_bihash_search_24_8 (&gbp_ep_db.ged_by_ip_rd, &key, &value);
146 return (gbp_endpoint_get (value.value));
150 gbp_endpoint_add_itf (u32 sw_if_index, index_t gei)
152 vec_validate_init_empty (gbp_ep_db.ged_by_sw_if_index, sw_if_index, ~0);
154 gbp_ep_db.ged_by_sw_if_index[sw_if_index] = gei;
158 gbp_endpoint_add_mac (const mac_address_t * mac, u32 bd_index, index_t gei)
160 clib_bihash_kv_16_8_t key;
163 gbp_endpoint_mk_key_mac (mac->bytes, bd_index, &key);
166 rv = clib_bihash_add_del_16_8 (&gbp_ep_db.ged_by_mac_bd, &key, 1);
173 gbp_endpoint_add_ip (const ip46_address_t * ip, u32 fib_index, index_t gei)
175 clib_bihash_kv_24_8_t key;
178 gbp_endpoint_mk_key_ip (ip, fib_index, &key);
181 rv = clib_bihash_add_del_24_8 (&gbp_ep_db.ged_by_ip_rd, &key, 1);
187 gbp_endpoint_del_mac (const mac_address_t * mac, u32 bd_index)
189 clib_bihash_kv_16_8_t key;
191 gbp_endpoint_mk_key_mac (mac->bytes, bd_index, &key);
193 clib_bihash_add_del_16_8 (&gbp_ep_db.ged_by_mac_bd, &key, 0);
197 gbp_endpoint_del_ip (const ip46_address_t * ip, u32 fib_index)
199 clib_bihash_kv_24_8_t key;
201 gbp_endpoint_mk_key_ip (ip, fib_index, &key);
203 clib_bihash_add_del_24_8 (&gbp_ep_db.ged_by_ip_rd, &key, 0);
207 gbp_endpoint_index (const gbp_endpoint_t * ge)
209 return (ge - gbp_endpoint_pool);
213 gbp_endpoint_ip_is_equal (const fib_prefix_t * fp, const ip46_address_t * ip)
215 return (ip46_address_is_equal (ip, &fp->fp_addr));
219 gbp_endpoint_ips_update (gbp_endpoint_t * ge,
220 const ip46_address_t * ips,
221 const gbp_route_domain_t * grd)
223 const ip46_address_t *ip;
226 gei = gbp_endpoint_index (ge);
227 grdi = gbp_route_domain_index (grd);
229 ASSERT ((ge->ge_key.gek_grd == INDEX_INVALID) ||
230 (ge->ge_key.gek_grd == grdi));
232 vec_foreach (ip, ips)
234 if (~0 == vec_search_with_function (ge->ge_key.gek_ips, ip,
235 gbp_endpoint_ip_is_equal))
239 vec_add2 (ge->ge_key.gek_ips, pfx, 1);
240 fib_prefix_from_ip46_addr (ip, pfx);
242 gbp_endpoint_add_ip (&pfx->fp_addr,
243 grd->grd_fib_index[pfx->fp_proto], gei);
245 ge->ge_key.gek_grd = grdi;
249 static gbp_endpoint_t *
250 gbp_endpoint_alloc (const ip46_address_t * ips,
251 const gbp_route_domain_t * grd,
252 const mac_address_t * mac,
253 const gbp_bridge_domain_t * gbd)
258 pool_get_zero (gbp_endpoint_pool, ge);
260 fib_node_init (&ge->ge_node, gbp_endpoint_fib_type);
261 gei = gbp_endpoint_index (ge);
263 ge->ge_key.gek_grd = ge->ge_fwd.gef_fib_index = INDEX_INVALID;
264 gbp_itf_hdl_reset (&ge->ge_fwd.gef_itf);
265 ge->ge_last_time = vlib_time_now (vlib_get_main ());
266 ge->ge_key.gek_gbd = gbp_bridge_domain_index (gbd);
270 mac_address_copy (&ge->ge_key.gek_mac, mac);
271 gbp_endpoint_add_mac (mac, gbd->gb_bd_index, gei);
273 gbp_endpoint_ips_update (ge, ips, grd);
279 gbp_endpoint_loc_is_equal (gbp_endpoint_loc_t * a, gbp_endpoint_loc_t * b)
281 return (a->gel_src == b->gel_src);
285 gbp_endpoint_loc_cmp_for_sort (gbp_endpoint_loc_t * a, gbp_endpoint_loc_t * b)
287 return (a->gel_src - b->gel_src);
290 static gbp_endpoint_loc_t *
291 gbp_endpoint_loc_find (gbp_endpoint_t * ge, gbp_endpoint_src_t src)
293 gbp_endpoint_loc_t gel = {
298 pos = vec_search_with_function (ge->ge_locs, &gel,
299 gbp_endpoint_loc_is_equal);
302 return (&ge->ge_locs[pos]);
308 gbp_endpoint_loc_unlock (gbp_endpoint_t * ge, gbp_endpoint_loc_t * gel)
314 if (0 == gel->gel_locks)
316 pos = gel - ge->ge_locs;
318 vec_del1 (ge->ge_locs, pos);
319 if (vec_len (ge->ge_locs) > 1)
320 vec_sort_with_function (ge->ge_locs, gbp_endpoint_loc_cmp_for_sort);
322 /* This could be the last lock, so don't access the EP from
324 fib_node_unlock (&ge->ge_node);
332 gbp_endpoint_loc_destroy (gbp_endpoint_loc_t * gel)
334 gbp_endpoint_group_unlock (gel->gel_epg);
335 gbp_itf_unlock (&gel->gel_itf);
338 static gbp_endpoint_loc_t *
339 gbp_endpoint_loc_find_or_add (gbp_endpoint_t * ge, gbp_endpoint_src_t src)
341 gbp_endpoint_loc_t gel = {
343 .gel_epg = INDEX_INVALID,
344 .gel_itf = GBP_ITF_HDL_INVALID,
349 pos = vec_search_with_function (ge->ge_locs, &gel,
350 gbp_endpoint_loc_is_equal);
354 vec_add1 (ge->ge_locs, gel);
356 if (vec_len (ge->ge_locs) > 1)
358 vec_sort_with_function (ge->ge_locs, gbp_endpoint_loc_cmp_for_sort);
360 pos = vec_search_with_function (ge->ge_locs, &gel,
361 gbp_endpoint_loc_is_equal);
367 * it's the sources and children that lock the endpoints
369 fib_node_lock (&ge->ge_node);
372 return (&ge->ge_locs[pos]);
376 * Find an EP inthe DBs and check that if we find it in the L2 DB
377 * it has the same IPs as this update
380 gbp_endpoint_find_for_update (const ip46_address_t * ips,
381 const gbp_route_domain_t * grd,
382 const mac_address_t * mac,
383 const gbp_bridge_domain_t * gbd,
384 gbp_endpoint_t ** ge)
386 gbp_endpoint_t *l2_ge, *l3_ge, *tmp;
388 l2_ge = l3_ge = NULL;
390 if (NULL != mac && !mac_address_is_zero (mac))
393 l2_ge = gbp_endpoint_find_mac (mac->bytes, gbd->gb_bd_index);
395 if (NULL != ips && !ip46_address_is_zero (ips))
397 const ip46_address_t *ip;
398 fib_protocol_t fproto;
401 vec_foreach (ip, ips)
403 fproto = fib_proto_from_ip46 (ip46_address_get_type (ip));
405 tmp = gbp_endpoint_find_ip (ip, grd->grd_fib_index[fproto]);
410 else if (NULL == l3_ge)
411 /* first match against an IP address */
413 else if (tmp == l3_ge)
414 /* another match against IP address that is the same endpoint */
419 * a match agains a different endpoint.
420 * this means the KEY of the EP is changing which is not allowed
427 if (NULL == l2_ge && NULL == l3_ge)
430 else if (NULL == l2_ge)
433 else if (NULL == l3_ge)
438 /* found both L3 and L2 - they must be the same else the KEY
450 static gbp_endpoint_src_t
451 gbp_endpoint_get_best_src (const gbp_endpoint_t * ge)
453 if (0 == vec_len (ge->ge_locs))
454 return (GBP_ENDPOINT_SRC_MAX);
456 return (ge->ge_locs[0].gel_src);
460 gbp_endpoint_n_learned (int n)
462 gbp_n_learnt_endpoints += n;
464 if (n > 0 && 1 == gbp_n_learnt_endpoints)
466 vlib_process_signal_event (vlib_get_main (),
467 gbp_scanner_node.index,
468 GBP_ENDPOINT_SCAN_START, 0);
470 if (n < 0 && 0 == gbp_n_learnt_endpoints)
472 vlib_process_signal_event (vlib_get_main (),
473 gbp_scanner_node.index,
474 GBP_ENDPOINT_SCAN_STOP, 0);
479 gbp_endpoint_loc_update (const gbp_endpoint_t * ge,
480 gbp_endpoint_loc_t * gel,
481 const gbp_bridge_domain_t * gb,
484 gbp_endpoint_flags_t flags,
485 const ip46_address_t * tun_src,
486 const ip46_address_t * tun_dst)
488 int was_learnt, is_learnt;
491 was_learnt = ! !(gel->gel_flags & GBP_ENDPOINT_FLAG_REMOTE);
492 gel->gel_flags = flags;
493 is_learnt = ! !(gel->gel_flags & GBP_ENDPOINT_FLAG_REMOTE);
495 gbp_endpoint_n_learned (is_learnt - was_learnt);
500 gbp_endpoint_group_lock (ggi);
501 gbp_endpoint_group_unlock (gel->gel_epg);
504 if (gel->gel_flags & GBP_ENDPOINT_FLAG_REMOTE)
507 ip46_address_copy (&gel->tun.gel_src, tun_src);
509 ip46_address_copy (&gel->tun.gel_dst, tun_dst);
511 if (ip46_address_is_multicast (&gel->tun.gel_src))
514 * we learnt the EP from the multicast tunnel.
515 * Create a unicast TEP from the packet's source
516 * and the fixed address of the BD's parent tunnel
518 const gbp_vxlan_tunnel_t *gt;
520 gt = gbp_vxlan_tunnel_get (gb->gb_vni);
524 ip46_address_copy (&gel->tun.gel_src, >->gt_src);
525 sw_if_index = gt->gt_sw_if_index;
530 * the input interface may be the parent GBP-vxlan interface,
531 * create a child vlxan-gbp tunnel and use that as the endpoint's
534 gbp_itf_hdl_t old = gel->gel_itf;
536 switch (gbp_vxlan_tunnel_get_type (sw_if_index))
538 case GBP_VXLAN_TEMPLATE_TUNNEL:
539 gel->tun.gel_parent_sw_if_index = sw_if_index;
540 gel->gel_itf = gbp_vxlan_tunnel_clone_and_lock (sw_if_index,
544 case VXLAN_GBP_TUNNEL:
545 gel->tun.gel_parent_sw_if_index =
546 vxlan_gbp_tunnel_get_parent (sw_if_index);
547 gel->gel_itf = vxlan_gbp_tunnel_lock_itf (sw_if_index);
551 gbp_itf_unlock (&old);
555 gel->gel_itf = gbp_itf_l2_add_and_lock (sw_if_index,
561 gbb_endpoint_fwd_reset (gbp_endpoint_t * ge)
563 const gbp_route_domain_t *grd;
564 const gbp_bridge_domain_t *gbd;
565 gbp_endpoint_fwd_t *gef;
566 const fib_prefix_t *pfx;
569 gbd = gbp_bridge_domain_get (ge->ge_key.gek_gbd);
572 vec_foreach (pfx, ge->ge_key.gek_ips)
576 grd = gbp_route_domain_get (ge->ge_key.gek_grd);
577 fib_index = grd->grd_fib_index[pfx->fp_proto];
579 bd_add_del_ip_mac (gbd->gb_bd_index, fib_proto_to_ip46 (pfx->fp_proto),
580 &pfx->fp_addr, &ge->ge_key.gek_mac, 0);
583 * remove a host route
585 if (gbp_endpoint_is_remote (ge))
587 fib_table_entry_special_remove (fib_index, pfx, gbp_fib_source_hi);
590 fib_table_entry_delete (fib_index, pfx, gbp_fib_source_low);
592 vec_foreach (ai, gef->gef_adjs)
597 if (gbp_itf_hdl_is_valid (gef->gef_itf))
599 l2fib_del_entry (ge->ge_key.gek_mac.bytes,
601 gbp_itf_get_sw_if_index (gef->gef_itf));
604 gbp_itf_unlock (&gef->gef_itf);
605 vec_free (gef->gef_adjs);
609 gbb_endpoint_fwd_recalc (gbp_endpoint_t * ge)
611 const gbp_bridge_domain_t *gbd;
612 const gbp_endpoint_group_t *gg;
613 const gbp_route_domain_t *grd;
614 gbp_endpoint_loc_t *gel;
615 gbp_endpoint_fwd_t *gef;
616 const fib_prefix_t *pfx;
620 * locations are sort in source priority order
622 gei = gbp_endpoint_index (ge);
623 gel = &ge->ge_locs[0];
625 gbd = gbp_bridge_domain_get (ge->ge_key.gek_gbd);
627 gef->gef_flags = gel->gel_flags;
629 if (INDEX_INVALID != gel->gel_epg)
631 gg = gbp_endpoint_group_get (gel->gel_epg);
632 gef->gef_sclass = gg->gg_sclass;
639 gef->gef_itf = gbp_itf_clone_and_lock (gel->gel_itf);
641 if (!mac_address_is_zero (&ge->ge_key.gek_mac))
643 gbp_itf_l2_set_input_feature (gef->gef_itf, L2INPUT_FEAT_GBP_FWD);
645 if (gbp_endpoint_is_remote (ge) || gbp_endpoint_is_external (ge))
648 * bridged packets to external endpoints should be classifed
649 * based on the EP's/BD's EPG
651 gbp_itf_l2_set_output_feature (gef->gef_itf,
652 L2OUTPUT_FEAT_GBP_POLICY_MAC);
656 gbp_endpoint_add_itf (gbp_itf_get_sw_if_index (gef->gef_itf), gei);
657 gbp_itf_l2_set_output_feature (gef->gef_itf,
658 L2OUTPUT_FEAT_GBP_POLICY_PORT);
660 l2fib_add_entry (ge->ge_key.gek_mac.bytes,
662 gbp_itf_get_sw_if_index (gef->gef_itf),
663 L2FIB_ENTRY_RESULT_FLAG_STATIC);
666 vec_foreach (pfx, ge->ge_key.gek_ips)
668 ethernet_header_t *eth;
675 grd = gbp_route_domain_get (ge->ge_key.gek_grd);
676 fib_index = grd->grd_fib_index[pfx->fp_proto];
677 gef->gef_fib_index = fib_index;
679 bd_add_del_ip_mac (gbd->gb_bd_index, fib_proto_to_ip46 (pfx->fp_proto),
680 &pfx->fp_addr, &ge->ge_key.gek_mac, 1);
683 * add a host route via the EPG's BVI we need this because the
684 * adj fib does not install, due to cover refinement check, since
685 * the BVI's prefix is /32
687 vec_validate (rewrite, sizeof (*eth) - 1);
688 eth = (ethernet_header_t *) rewrite;
690 eth->type = clib_host_to_net_u16 ((pfx->fp_proto == FIB_PROTOCOL_IP4 ?
694 if (gbp_endpoint_is_remote (ge))
697 * for dynamic EPs we must add the IP adjacency via the learned
698 * tunnel since the BD will not contain the EP's MAC since it was
699 * L3 learned. The dst MAC address used is the 'BD's MAC'.
701 ip_sw_if_index = gbp_itf_get_sw_if_index (gef->gef_itf);
703 mac_address_to_bytes (gbp_route_domain_get_local_mac (),
705 mac_address_to_bytes (gbp_route_domain_get_remote_mac (),
711 * for the static EPs we add the IP adjacency via the BVI
712 * knowing that the BD has the MAC address to route to and
713 * that policy will be applied on egress to the EP's port
715 ip_sw_if_index = gbd->gb_bvi_sw_if_index;
717 clib_memcpy (eth->src_address,
718 vnet_sw_interface_get_hw_address (vnet_get_main (),
720 sizeof (eth->src_address));
721 mac_address_to_bytes (&ge->ge_key.gek_mac, eth->dst_address);
724 fib_table_entry_path_add (fib_index, pfx,
727 fib_proto_to_dpo (pfx->fp_proto),
728 &pfx->fp_addr, ip_sw_if_index,
729 ~0, 1, NULL, FIB_ROUTE_PATH_FLAG_NONE);
731 ai = adj_nbr_add_or_lock_w_rewrite (pfx->fp_proto,
732 fib_proto_to_link (pfx->fp_proto),
734 ip_sw_if_index, rewrite);
735 vec_add1 (gef->gef_adjs, ai);
738 * if the endpoint is external then routed packet to it must be
739 * classifed to the BD's EPG. but this will happen anyway with
740 * the GBP_MAC classification.
745 if (gbp_endpoint_is_remote (ge))
747 dpo_id_t policy_dpo = DPO_INVALID;
750 * interpose a policy DPO from the endpoint so that policy
753 gbp_policy_dpo_add_or_lock (fib_proto_to_dpo (pfx->fp_proto),
755 gg->gg_sclass, ~0, &policy_dpo);
757 fib_table_entry_special_dpo_add (fib_index, pfx,
759 FIB_ENTRY_FLAG_INTERPOSE,
761 dpo_reset (&policy_dpo);
765 * send a gratuitous ARP on the EPG's uplink. this is done so
766 * that if this EP has moved from some other place in the
767 * 'fabric', upstream devices are informed
769 if (gbp_endpoint_is_local (ge) && ~0 != gg->gg_uplink_sw_if_index)
771 gbp_endpoint_add_itf (gbp_itf_get_sw_if_index (gef->gef_itf),
773 if (FIB_PROTOCOL_IP4 == pfx->fp_proto)
774 ip4_neighbor_advertise (
775 vlib_get_main (), vnet_get_main (), gg->gg_uplink_sw_if_index,
776 vlib_get_thread_index (), &pfx->fp_addr.ip4);
778 ip6_neighbor_advertise (
779 vlib_get_main (), vnet_get_main (), gg->gg_uplink_sw_if_index,
780 vlib_get_thread_index (), &pfx->fp_addr.ip6);
785 if (gbp_endpoint_is_external (ge))
787 gbp_itf_l2_set_input_feature (gef->gef_itf,
788 L2INPUT_FEAT_GBP_LPM_CLASSIFY);
790 else if (gbp_endpoint_is_local (ge))
793 * non-remote endpoints (i.e. those not arriving on iVXLAN
794 * tunnels) need to be classifed based on the the input interface.
795 * We enable the GBP-FWD feature only if the group has an uplink
796 * interface (on which the GBP-FWD feature would send UU traffic).
797 * External endpoints get classified based on an LPM match
799 l2input_feat_masks_t feats = L2INPUT_FEAT_GBP_SRC_CLASSIFY;
801 if (NULL != gg && ~0 != gg->gg_uplink_sw_if_index)
802 feats |= L2INPUT_FEAT_GBP_FWD;
803 gbp_itf_l2_set_input_feature (gef->gef_itf, feats);
807 * update children with the new forwarding info
809 fib_node_back_walk_ctx_t bw_ctx = {
810 .fnbw_reason = FIB_NODE_BW_REASON_FLAG_EVALUATE,
811 .fnbw_flags = FIB_NODE_BW_FLAG_FORCE_SYNC,
814 fib_walk_sync (gbp_endpoint_fib_type, gei, &bw_ctx);
818 gbp_endpoint_update_and_lock (gbp_endpoint_src_t src,
820 const ip46_address_t * ips,
821 const mac_address_t * mac,
822 index_t gbdi, index_t grdi,
824 gbp_endpoint_flags_t flags,
825 const ip46_address_t * tun_src,
826 const ip46_address_t * tun_dst, u32 * handle)
828 gbp_bridge_domain_t *gbd;
829 gbp_endpoint_group_t *gg;
830 gbp_endpoint_src_t best;
831 gbp_route_domain_t *grd;
832 gbp_endpoint_loc_t *gel;
837 if (~0 == sw_if_index)
838 return (VNET_API_ERROR_INVALID_SW_IF_INDEX);
844 * we need to determine the bridge-domain, either from the EPG or
847 if (SCLASS_INVALID != sclass)
849 ggi = gbp_endpoint_group_find (sclass);
851 if (INDEX_INVALID == ggi)
852 return (VNET_API_ERROR_NO_SUCH_ENTRY);
854 gg = gbp_endpoint_group_get (ggi);
860 if (INDEX_INVALID == gbdi)
861 return (VNET_API_ERROR_NO_SUCH_ENTRY);
862 if (INDEX_INVALID == grdi)
863 return (VNET_API_ERROR_NO_SUCH_FIB);
867 gbd = gbp_bridge_domain_get (gbdi);
868 grd = gbp_route_domain_get (grdi);
869 rv = gbp_endpoint_find_for_update (ips, grd, mac, gbd, &ge);
876 ge = gbp_endpoint_alloc (ips, grd, mac, gbd);
880 gbp_endpoint_ips_update (ge, ips, grd);
883 best = gbp_endpoint_get_best_src (ge);
884 gei = gbp_endpoint_index (ge);
885 gel = gbp_endpoint_loc_find_or_add (ge, src);
887 gbp_endpoint_loc_update (ge, gel, gbd, sw_if_index, ggi, flags,
893 * either the best source has been updated or we have a new best source
895 gbb_endpoint_fwd_reset (ge);
896 gbb_endpoint_fwd_recalc (ge);
901 * an update to a lower priority source, so we need do nothing
908 GBP_ENDPOINT_INFO ("update: %U", format_gbp_endpoint, gei);
914 gbp_endpoint_unlock (gbp_endpoint_src_t src, index_t gei)
916 gbp_endpoint_loc_t *gel, gel_copy;
917 gbp_endpoint_src_t best;
921 if (pool_is_free_index (gbp_endpoint_pool, gei))
924 GBP_ENDPOINT_INFO ("delete: %U", format_gbp_endpoint, gei);
926 ge = gbp_endpoint_get (gei);
928 gel = gbp_endpoint_loc_find (ge, src);
934 * lock the EP so we can control when it is deleted
936 fib_node_lock (&ge->ge_node);
937 best = gbp_endpoint_get_best_src (ge);
940 * copy the location info since we'll lose it when it's removed from
943 clib_memcpy (&gel_copy, gel, sizeof (gel_copy));
946 * remove the source we no longer need
948 removed = gbp_endpoint_loc_unlock (ge, gel);
953 * we have removed the old best source => recalculate fwding
955 if (0 == vec_len (ge->ge_locs))
958 * if there are no more sources left, then we need only release
959 * the fwding resources held and then this EP is gawn.
961 gbb_endpoint_fwd_reset (ge);
966 * else there are more sources. release the old and get new
969 gbb_endpoint_fwd_reset (ge);
970 gbb_endpoint_fwd_recalc (ge);
975 * we removed a lower priority source so we need to do nothing
979 * clear up any resources held by the source
982 gbp_endpoint_loc_destroy (&gel_copy);
985 * remove the lock taken above
987 fib_node_unlock (&ge->ge_node);
989 * We may have removed the last source and so this EP is now TOAST
990 * DO NOTHING BELOW HERE
995 gbp_endpoint_child_add (index_t gei,
996 fib_node_type_t type, fib_node_index_t index)
998 return (fib_node_child_add (gbp_endpoint_fib_type, gei, type, index));
1002 gbp_endpoint_child_remove (index_t gei, u32 sibling)
1004 return (fib_node_child_remove (gbp_endpoint_fib_type, gei, sibling));
1007 typedef struct gbp_endpoint_flush_ctx_t_
1010 gbp_endpoint_src_t src;
1012 } gbp_endpoint_flush_ctx_t;
1015 gbp_endpoint_flush_cb (index_t gei, void *args)
1017 gbp_endpoint_flush_ctx_t *ctx = args;
1018 gbp_endpoint_loc_t *gel;
1021 ge = gbp_endpoint_get (gei);
1022 gel = gbp_endpoint_loc_find (ge, ctx->src);
1024 if ((NULL != gel) && ctx->sw_if_index == gel->tun.gel_parent_sw_if_index)
1026 vec_add1 (ctx->geis, gei);
1029 return (WALK_CONTINUE);
1033 * remove all learnt endpoints using the interface
1036 gbp_endpoint_flush (gbp_endpoint_src_t src, u32 sw_if_index)
1038 gbp_endpoint_flush_ctx_t ctx = {
1039 .sw_if_index = sw_if_index,
1044 GBP_ENDPOINT_INFO ("flush: %U %U",
1045 format_gbp_endpoint_src, src,
1046 format_vnet_sw_if_index_name, vnet_get_main (),
1048 gbp_endpoint_walk (gbp_endpoint_flush_cb, &ctx);
1050 vec_foreach (gei, ctx.geis)
1052 gbp_endpoint_unlock (src, *gei);
1055 vec_free (ctx.geis);
1059 gbp_endpoint_walk (gbp_endpoint_cb_t cb, void *ctx)
1064 pool_foreach_index (index, gbp_endpoint_pool)
1066 if (!cb(index, ctx))
1072 static clib_error_t *
1073 gbp_endpoint_cli (vlib_main_t * vm,
1074 unformat_input_t * input, vlib_cli_command_t * cmd)
1076 ip46_address_t ip = ip46_address_initializer, *ips = NULL;
1077 mac_address_t mac = ZERO_MAC_ADDRESS;
1078 vnet_main_t *vnm = vnet_get_main ();
1079 u32 sclass = SCLASS_INVALID;
1080 u32 handle = INDEX_INVALID;
1081 u32 sw_if_index = ~0;
1082 u32 flags = GBP_ENDPOINT_FLAG_NONE;
1086 while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
1088 ip46_address_reset (&ip);
1090 if (unformat (input, "%U", unformat_vnet_sw_interface,
1093 else if (unformat (input, "add"))
1095 else if (unformat (input, "del"))
1097 else if (unformat (input, "sclass %d", &sclass))
1099 else if (unformat (input, "handle %d", &handle))
1101 else if (unformat (input, "ip %U", unformat_ip4_address, &ip.ip4))
1103 else if (unformat (input, "ip %U", unformat_ip6_address, &ip.ip6))
1105 else if (unformat (input, "mac %U", unformat_mac_address, &mac))
1107 else if (unformat (input, "flags 0x%x", &flags))
1115 if (~0 == sw_if_index)
1116 return clib_error_return (0, "interface must be specified");
1117 if (SCLASS_INVALID == sclass)
1118 return clib_error_return (0, "SCLASS must be specified");
1121 gbp_endpoint_update_and_lock (GBP_ENDPOINT_SRC_CP,
1122 sw_if_index, ips, &mac,
1123 INDEX_INVALID, INDEX_INVALID,
1124 sclass, flags, NULL, NULL, &handle);
1127 return clib_error_return (0, "GBP Endpoint update returned %d", rv);
1129 vlib_cli_output (vm, "handle %d\n", handle);
1133 if (INDEX_INVALID == handle)
1134 return clib_error_return (0, "handle must be specified");
1136 gbp_endpoint_unlock (GBP_ENDPOINT_SRC_CP, handle);
1145 * Configure a GBP Endpoint
1148 * @cliexstart{gbp endpoint del <handle> | [add] <interface> sclass <SCLASS> ip <IP> mac <MAC> [flags <flags>]}
1152 VLIB_CLI_COMMAND (gbp_endpoint_cli_node, static) = {
1153 .path = "gbp endpoint",
1154 .short_help = "gbp endpoint del <handle> | [add] <interface> sclass <SCLASS> ip <IP> mac <MAC> [flags <flags>]",
1155 .function = gbp_endpoint_cli,
1160 format_gbp_endpoint_src (u8 * s, va_list * args)
1162 gbp_endpoint_src_t action = va_arg (*args, gbp_endpoint_src_t);
1166 #define _(v,a) case GBP_ENDPOINT_SRC_##v: return (format (s, "%s", a));
1167 foreach_gbp_endpoint_src
1171 return (format (s, "unknown"));
1175 format_gbp_endpoint_fwd (u8 * s, va_list * args)
1177 gbp_endpoint_fwd_t *gef = va_arg (*args, gbp_endpoint_fwd_t *);
1179 s = format (s, "fwd:");
1180 s = format (s, "\n itf:[%U]", format_gbp_itf_hdl, gef->gef_itf);
1181 if (GBP_ENDPOINT_FLAG_NONE != gef->gef_flags)
1183 s = format (s, " flags:%U", format_gbp_endpoint_flags, gef->gef_flags);
1190 format_gbp_endpoint_key (u8 * s, va_list * args)
1192 gbp_endpoint_key_t *gek = va_arg (*args, gbp_endpoint_key_t *);
1193 const fib_prefix_t *pfx;
1195 s = format (s, "ips:[");
1197 vec_foreach (pfx, gek->gek_ips)
1199 s = format (s, "%U, ", format_fib_prefix, pfx);
1201 s = format (s, "]");
1203 s = format (s, " mac:%U", format_mac_address_t, &gek->gek_mac);
1209 format_gbp_endpoint_loc (u8 * s, va_list * args)
1211 gbp_endpoint_loc_t *gel = va_arg (*args, gbp_endpoint_loc_t *);
1213 s = format (s, "%U", format_gbp_endpoint_src, gel->gel_src);
1214 s = format (s, "\n EPG:%d [%U]", gel->gel_epg,
1215 format_gbp_itf_hdl, gel->gel_itf);
1217 if (GBP_ENDPOINT_FLAG_NONE != gel->gel_flags)
1219 s = format (s, " flags:%U", format_gbp_endpoint_flags, gel->gel_flags);
1221 if (GBP_ENDPOINT_FLAG_REMOTE & gel->gel_flags)
1223 s = format (s, " tun:[");
1224 s = format (s, "parent:%U", format_vnet_sw_if_index_name,
1225 vnet_get_main (), gel->tun.gel_parent_sw_if_index);
1226 s = format (s, " {%U,%U}]",
1227 format_ip46_address, &gel->tun.gel_src, IP46_TYPE_ANY,
1228 format_ip46_address, &gel->tun.gel_dst, IP46_TYPE_ANY);
1235 format_gbp_endpoint (u8 * s, va_list * args)
1237 index_t gei = va_arg (*args, index_t);
1238 gbp_endpoint_loc_t *gel;
1241 ge = gbp_endpoint_get (gei);
1243 s = format (s, "[@%d] %U", gei, format_gbp_endpoint_key, &ge->ge_key);
1244 s = format (s, " last-time:[%f]", ge->ge_last_time);
1246 vec_foreach (gel, ge->ge_locs)
1248 s = format (s, "\n %U", format_gbp_endpoint_loc, gel);
1250 s = format (s, "\n %U", format_gbp_endpoint_fwd, &ge->ge_fwd);
1256 gbp_endpoint_show_one (index_t gei, void *ctx)
1261 vlib_cli_output (vm, " %U", format_gbp_endpoint, gei);
1263 return (WALK_CONTINUE);
1267 gbp_endpoint_walk_ip_itf (clib_bihash_kv_24_8_t * kvp, void *arg)
1275 gbp_endpoint_extract_key_ip_itf (kvp, &ip, &sw_if_index);
1277 vlib_cli_output (vm, " {%U, %U} -> %d",
1278 format_ip46_address, &ip, IP46_TYPE_ANY,
1279 format_vnet_sw_if_index_name, vnet_get_main (),
1280 sw_if_index, kvp->value);
1281 return (BIHASH_WALK_CONTINUE);
1285 gbp_endpoint_walk_mac_itf (clib_bihash_kv_16_8_t * kvp, void *arg)
1293 gbp_endpoint_extract_key_mac_itf (kvp, &mac, &sw_if_index);
1295 vlib_cli_output (vm, " {%U, %U} -> %d",
1296 format_mac_address_t, &mac,
1297 format_vnet_sw_if_index_name, vnet_get_main (),
1298 sw_if_index, kvp->value);
1299 return (BIHASH_WALK_CONTINUE);
1302 static clib_error_t *
1303 gbp_endpoint_show (vlib_main_t * vm,
1304 unformat_input_t * input, vlib_cli_command_t * cmd)
1306 u32 show_dbs, handle;
1308 handle = INDEX_INVALID;
1311 while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
1313 if (unformat (input, "%d", &handle))
1315 else if (unformat (input, "db"))
1321 if (INDEX_INVALID != handle)
1323 vlib_cli_output (vm, "%U", format_gbp_endpoint, handle);
1327 vlib_cli_output (vm, "\nDatabases:");
1328 clib_bihash_foreach_key_value_pair_24_8 (&gbp_ep_db.ged_by_ip_rd,
1329 gbp_endpoint_walk_ip_itf, vm);
1330 clib_bihash_foreach_key_value_pair_16_8
1331 (&gbp_ep_db.ged_by_mac_bd, gbp_endpoint_walk_mac_itf, vm);
1335 vlib_cli_output (vm, "Endpoints:");
1336 gbp_endpoint_walk (gbp_endpoint_show_one, vm);
1343 * Show Group Based Policy Endpoints and derived information
1346 * @cliexstart{show gbp endpoint}
1350 VLIB_CLI_COMMAND (gbp_endpoint_show_node, static) = {
1351 .path = "show gbp endpoint",
1352 .short_help = "show gbp endpoint\n",
1353 .function = gbp_endpoint_show,
1358 gbp_endpoint_check (index_t gei, f64 start_time)
1360 gbp_endpoint_group_t *gg;
1361 gbp_endpoint_loc_t *gel;
1364 ge = gbp_endpoint_get (gei);
1365 gel = gbp_endpoint_loc_find (ge, GBP_ENDPOINT_SRC_DP);
1369 gg = gbp_endpoint_group_get (gel->gel_epg);
1371 if ((start_time - ge->ge_last_time) >
1372 gg->gg_retention.remote_ep_timeout)
1374 gbp_endpoint_unlock (GBP_ENDPOINT_SRC_DP, gei);
1380 gbp_endpoint_scan_l2 (vlib_main_t * vm)
1382 clib_bihash_16_8_t *gte_table = &gbp_ep_db.ged_by_mac_bd;
1383 f64 last_start, start_time, delta_t;
1386 if (!gte_table->instantiated)
1390 last_start = start_time = vlib_time_now (vm);
1392 for (i = 0; i < gte_table->nbuckets; i++)
1394 clib_bihash_bucket_16_8_t *b;
1395 clib_bihash_value_16_8_t *v;
1397 /* allow no more than 20us without a pause */
1398 delta_t = vlib_time_now (vm) - last_start;
1399 if (delta_t > 20e-6)
1401 /* suspend for 100 us */
1402 vlib_process_suspend (vm, 100e-6);
1403 last_start = vlib_time_now (vm);
1406 b = clib_bihash_get_bucket_16_8 (gte_table, i);
1407 if (clib_bihash_bucket_is_empty_16_8 (b))
1409 v = clib_bihash_get_value_16_8 (gte_table, b->offset);
1411 for (j = 0; j < (1 << b->log2_pages); j++)
1413 for (k = 0; k < BIHASH_KVP_PER_PAGE; k++)
1415 if (clib_bihash_is_free_16_8 (&v->kvp[k]))
1418 gbp_endpoint_check (v->kvp[k].value, start_time);
1421 * Note: we may have just freed the bucket's backing
1422 * storage, so check right here...
1424 if (clib_bihash_bucket_is_empty_16_8 (b))
1435 gbp_endpoint_scan_l3 (vlib_main_t * vm)
1437 clib_bihash_24_8_t *gte_table = &gbp_ep_db.ged_by_ip_rd;
1438 f64 last_start, start_time, delta_t;
1441 if (!gte_table->instantiated)
1445 last_start = start_time = vlib_time_now (vm);
1447 for (i = 0; i < gte_table->nbuckets; i++)
1449 clib_bihash_bucket_24_8_t *b;
1450 clib_bihash_value_24_8_t *v;
1452 /* allow no more than 20us without a pause */
1453 delta_t = vlib_time_now (vm) - last_start;
1454 if (delta_t > 20e-6)
1456 /* suspend for 100 us */
1457 vlib_process_suspend (vm, 100e-6);
1458 last_start = vlib_time_now (vm);
1461 b = clib_bihash_get_bucket_24_8 (gte_table, i);
1462 if (clib_bihash_bucket_is_empty_24_8 (b))
1464 v = clib_bihash_get_value_24_8 (gte_table, b->offset);
1466 for (j = 0; j < (1 << b->log2_pages); j++)
1468 for (k = 0; k < BIHASH_KVP_PER_PAGE; k++)
1470 if (clib_bihash_is_free_24_8 (&v->kvp[k]))
1473 gbp_endpoint_check (v->kvp[k].value, start_time);
1476 * Note: we may have just freed the bucket's backing
1477 * storage, so check right here...
1479 if (clib_bihash_bucket_is_empty_24_8 (b))
1490 gbp_endpoint_scan (vlib_main_t * vm)
1492 gbp_endpoint_scan_l2 (vm);
1493 gbp_endpoint_scan_l3 (vm);
1497 gbp_endpoint_get_node (fib_node_index_t index)
1501 ge = gbp_endpoint_get (index);
1503 return (&ge->ge_node);
1506 static gbp_endpoint_t *
1507 gbp_endpoint_from_fib_node (fib_node_t * node)
1509 ASSERT (gbp_endpoint_fib_type == node->fn_type);
1510 return ((gbp_endpoint_t *) node);
1514 gbp_endpoint_last_lock_gone (fib_node_t * node)
1516 const gbp_bridge_domain_t *gbd;
1517 const gbp_route_domain_t *grd;
1518 const fib_prefix_t *pfx;
1521 ge = gbp_endpoint_from_fib_node (node);
1523 ASSERT (0 == vec_len (ge->ge_locs));
1525 gbd = gbp_bridge_domain_get (ge->ge_key.gek_gbd);
1528 * we have removed the last source. this EP is toast
1530 if (INDEX_INVALID != ge->ge_key.gek_gbd)
1532 gbp_endpoint_del_mac (&ge->ge_key.gek_mac, gbd->gb_bd_index);
1534 vec_foreach (pfx, ge->ge_key.gek_ips)
1536 grd = gbp_route_domain_get (ge->ge_key.gek_grd);
1537 gbp_endpoint_del_ip (&pfx->fp_addr, grd->grd_fib_index[pfx->fp_proto]);
1539 pool_put (gbp_endpoint_pool, ge);
1542 static fib_node_back_walk_rc_t
1543 gbp_endpoint_back_walk_notify (fib_node_t * node,
1544 fib_node_back_walk_ctx_t * ctx)
1548 return (FIB_NODE_BACK_WALK_CONTINUE);
1552 * The FIB path's graph node virtual function table
1554 static const fib_node_vft_t gbp_endpoint_vft = {
1555 .fnv_get = gbp_endpoint_get_node,
1556 .fnv_last_lock = gbp_endpoint_last_lock_gone,
1557 .fnv_back_walk = gbp_endpoint_back_walk_notify,
1558 // .fnv_mem_show = fib_path_memory_show,
1561 static clib_error_t *
1562 gbp_endpoint_init (vlib_main_t * vm)
1564 #define GBP_EP_HASH_NUM_BUCKETS (2 * 1024)
1565 #define GBP_EP_HASH_MEMORY_SIZE (1 << 20)
1567 clib_bihash_init_24_8 (&gbp_ep_db.ged_by_ip_rd,
1568 "GBP Endpoints - IP/RD",
1569 GBP_EP_HASH_NUM_BUCKETS, GBP_EP_HASH_MEMORY_SIZE);
1571 clib_bihash_init_16_8 (&gbp_ep_db.ged_by_mac_bd,
1572 "GBP Endpoints - MAC/BD",
1573 GBP_EP_HASH_NUM_BUCKETS, GBP_EP_HASH_MEMORY_SIZE);
1575 gbp_ep_logger = vlib_log_register_class ("gbp", "ep");
1576 gbp_endpoint_fib_type = fib_node_register_new_type (&gbp_endpoint_vft);
1577 gbp_fib_source_hi = fib_source_allocate ("gbp-endpoint-hi",
1578 FIB_SOURCE_PRIORITY_HI,
1579 FIB_SOURCE_BH_SIMPLE);
1580 gbp_fib_source_low = fib_source_allocate ("gbp-endpoint-low",
1581 FIB_SOURCE_PRIORITY_LOW,
1582 FIB_SOURCE_BH_SIMPLE);
1587 VLIB_INIT_FUNCTION (gbp_endpoint_init);
1590 * fd.io coding-style-patch-verification: ON
1593 * eval: (c-set-style "gnu")