ip-neighbor: Send API event when neighbor is removed
[vpp.git] / src / vnet / ip-neighbor / ip_neighbor.c
1 /*
2  * src/vnet/ip/ip_neighboor.c: ip neighbor generic handling
3  *
4  * Copyright (c) 2018 Cisco and/or its affiliates.
5  * Licensed under the Apache License, Version 2.0 (the "License");
6  * you may not use this file except in compliance with the License.
7  * You may obtain a copy of the License at:
8  *
9  *     http://www.apache.org/licenses/LICENSE-2.0
10  *
11  * Unless required by applicable law or agreed to in writing, software
12  * distributed under the License is distributed on an "AS IS" BASIS,
13  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14  * See the License for the specific language governing permissions and
15  * limitations under the License.
16  */
17
18 #include <vppinfra/llist.h>
19
20 #include <vnet/ip-neighbor/ip_neighbor.h>
21 #include <vnet/ip-neighbor/ip4_neighbor.h>
22 #include <vnet/ip-neighbor/ip6_neighbor.h>
23 #include <vnet/ip-neighbor/ip_neighbor_watch.h>
24
25 #include <vnet/ip/ip6_ll_table.h>
26 #include <vnet/fib/fib_table.h>
27 #include <vnet/adj/adj_mcast.h>
28
29 /** Pool for All IP neighbors */
30 static ip_neighbor_t *ip_neighbor_pool;
31
32 /** protocol specific lists of time sorted neighbors */
33 index_t ip_neighbor_list_head[N_AF];
34
35 typedef struct ip_neighbor_elt_t_
36 {
37   clib_llist_anchor_t ipne_anchor;
38   index_t ipne_index;
39 } ip_neighbor_elt_t;
40
41 /** Pool of linked list elemeents */
42 ip_neighbor_elt_t *ip_neighbor_elt_pool;
43
44 typedef struct ip_neighbor_db_t_
45 {
46   /** per interface hash */
47   uword **ipndb_hash;
48   /** per-protocol limit - max number of neighbors*/
49   u32 ipndb_limit;
50   /** max age of a neighbor before it's forcibly evicted */
51   u32 ipndb_age;
52   /** when the limit is reached and new neighbors are created, should
53    * we recycle an old one */
54   bool ipndb_recycle;
55   /** per-protocol number of elements */
56   u32 ipndb_n_elts;
57   /** per-protocol number of elements per-fib-index*/
58   u32 *ipndb_n_elts_per_fib;
59 } ip_neighbor_db_t;
60
61 static vlib_log_class_t ipn_logger;
62
63 /* DBs of neighbours one per AF */
64 /* *INDENT-OFF* */
65 static ip_neighbor_db_t ip_neighbor_db[N_AF] = {
66   [AF_IP4] = {
67     .ipndb_limit = 50000,
68     /* Default to not aging and not recycling */
69     .ipndb_age = 0,
70     .ipndb_recycle = false,
71   },
72   [AF_IP6] = {
73     .ipndb_limit = 50000,
74     /* Default to not aging and not recycling */
75     .ipndb_age = 0,
76     .ipndb_recycle = false,
77   }
78 };
79 /* *INDENT-ON* */
80
81 #define IP_NEIGHBOR_DBG(...)                           \
82     vlib_log_debug (ipn_logger, __VA_ARGS__);
83
84 #define IP_NEIGHBOR_INFO(...)                          \
85     vlib_log_notice (ipn_logger, __VA_ARGS__);
86
87 ip_neighbor_t *
88 ip_neighbor_get (index_t ipni)
89 {
90   if (pool_is_free_index (ip_neighbor_pool, ipni))
91     return (NULL);
92
93   return (pool_elt_at_index (ip_neighbor_pool, ipni));
94 }
95
96 static index_t
97 ip_neighbor_get_index (const ip_neighbor_t * ipn)
98 {
99   return (ipn - ip_neighbor_pool);
100 }
101
102 static void
103 ip_neighbor_touch (ip_neighbor_t * ipn)
104 {
105   ipn->ipn_flags &= ~IP_NEIGHBOR_FLAG_STALE;
106 }
107
108 static bool
109 ip_neighbor_is_dynamic (const ip_neighbor_t * ipn)
110 {
111   return (ipn->ipn_flags & IP_NEIGHBOR_FLAG_DYNAMIC);
112 }
113
114 const ip_address_t *
115 ip_neighbor_get_ip (const ip_neighbor_t * ipn)
116 {
117   return (&ipn->ipn_key->ipnk_ip);
118 }
119
120 ip_address_family_t
121 ip_neighbor_get_af (const ip_neighbor_t * ipn)
122 {
123   return (ip_addr_version (&ipn->ipn_key->ipnk_ip));
124 }
125
126 const mac_address_t *
127 ip_neighbor_get_mac (const ip_neighbor_t * ipn)
128 {
129   return (&ipn->ipn_mac);
130 }
131
132 const u32
133 ip_neighbor_get_sw_if_index (const ip_neighbor_t * ipn)
134 {
135   return (ipn->ipn_key->ipnk_sw_if_index);
136 }
137
138 static void
139 ip_neighbor_list_remove (ip_neighbor_t * ipn)
140 {
141   /* new neighbours, are added to the head of the list, since the
142    * list is time sorted, newest first */
143   ip_neighbor_elt_t *elt;
144
145   if (~0 != ipn->ipn_elt)
146     {
147       elt = pool_elt_at_index (ip_neighbor_elt_pool, ipn->ipn_elt);
148
149       clib_llist_remove (ip_neighbor_elt_pool, ipne_anchor, elt);
150
151       ipn->ipn_elt = ~0;
152     }
153 }
154
155 static void
156 ip_neighbor_refresh (ip_neighbor_t * ipn)
157 {
158   /* new neighbours, are added to the head of the list, since the
159    * list is time sorted, newest first */
160   ip_neighbor_elt_t *elt, *head;
161
162   ip_neighbor_touch (ipn);
163   ipn->ipn_time_last_updated = vlib_time_now (vlib_get_main ());
164   ipn->ipn_n_probes = 0;
165
166   if (ip_neighbor_is_dynamic (ipn))
167     {
168       if (~0 == ipn->ipn_elt)
169         /* first time insertion */
170         pool_get_zero (ip_neighbor_elt_pool, elt);
171       else
172         {
173           /* already inserted - extract first */
174           elt = pool_elt_at_index (ip_neighbor_elt_pool, ipn->ipn_elt);
175
176           clib_llist_remove (ip_neighbor_elt_pool, ipne_anchor, elt);
177         }
178       head = pool_elt_at_index (ip_neighbor_elt_pool,
179                                 ip_neighbor_list_head[ip_neighbor_get_af
180                                                       (ipn)]);
181
182       elt->ipne_index = ip_neighbor_get_index (ipn);
183       clib_llist_add (ip_neighbor_elt_pool, ipne_anchor, elt, head);
184       ipn->ipn_elt = elt - ip_neighbor_elt_pool;
185     }
186 }
187
188 static void
189 ip_neighbor_db_add (const ip_neighbor_t * ipn)
190 {
191   ip_address_family_t af;
192   u32 sw_if_index;
193
194   af = ip_neighbor_get_af (ipn);
195   sw_if_index = ipn->ipn_key->ipnk_sw_if_index;
196
197   vec_validate (ip_neighbor_db[af].ipndb_hash, sw_if_index);
198
199   if (!ip_neighbor_db[af].ipndb_hash[sw_if_index])
200     ip_neighbor_db[af].ipndb_hash[sw_if_index]
201       = hash_create_mem (0, sizeof (ip_neighbor_key_t), sizeof (index_t));
202
203   hash_set_mem (ip_neighbor_db[af].ipndb_hash[sw_if_index],
204                 ipn->ipn_key, ip_neighbor_get_index (ipn));
205
206   ip_neighbor_db[af].ipndb_n_elts++;
207 }
208
209 static void
210 ip_neighbor_db_remove (const ip_neighbor_t * ipn)
211 {
212   ip_address_family_t af;
213   u32 sw_if_index;
214
215   af = ip_neighbor_get_af (ipn);
216   sw_if_index = ipn->ipn_key->ipnk_sw_if_index;
217
218   vec_validate (ip_neighbor_db[af].ipndb_hash, sw_if_index);
219
220   hash_unset_mem (ip_neighbor_db[af].ipndb_hash[sw_if_index], ipn->ipn_key);
221
222   ip_neighbor_db[af].ipndb_n_elts--;
223 }
224
225 static ip_neighbor_t *
226 ip_neighbor_db_find (const ip_neighbor_key_t * key)
227 {
228   ip_address_family_t af;
229   uword *p;
230
231   af = ip_addr_version (&key->ipnk_ip);
232
233   if (key->ipnk_sw_if_index >= vec_len (ip_neighbor_db[af].ipndb_hash))
234     return NULL;
235
236   p = hash_get_mem (ip_neighbor_db[af].ipndb_hash
237                     [key->ipnk_sw_if_index], key);
238
239   if (p)
240     return ip_neighbor_get (p[0]);
241
242   return (NULL);
243 }
244
245 static u8
246 ip_af_type_pfx_len (ip_address_family_t type)
247 {
248   return (type == AF_IP4 ? 32 : 128);
249 }
250
251 static void
252 ip_neighbor_adj_fib_add (ip_neighbor_t * ipn, u32 fib_index)
253 {
254   ip_address_family_t af;
255
256   af = ip_neighbor_get_af (ipn);
257
258   if (af == AF_IP6 &&
259       ip6_address_is_link_local_unicast (&ip_addr_v6
260                                          (&ipn->ipn_key->ipnk_ip)))
261     {
262       ip6_ll_prefix_t pfx = {
263         .ilp_addr = ip_addr_v6 (&ipn->ipn_key->ipnk_ip),
264         .ilp_sw_if_index = ipn->ipn_key->ipnk_sw_if_index,
265       };
266       ipn->ipn_fib_entry_index =
267         ip6_ll_table_entry_update (&pfx, FIB_ROUTE_PATH_FLAG_NONE);
268     }
269   else
270     {
271       fib_protocol_t fproto;
272
273       fproto = ip_address_family_to_fib_proto (af);
274
275       fib_prefix_t pfx = {
276         .fp_len = ip_af_type_pfx_len (af),
277         .fp_proto = fproto,
278         .fp_addr = ip_addr_46 (&ipn->ipn_key->ipnk_ip),
279       };
280
281       ipn->ipn_fib_entry_index =
282         fib_table_entry_path_add (fib_index, &pfx, FIB_SOURCE_ADJ,
283                                   FIB_ENTRY_FLAG_ATTACHED,
284                                   fib_proto_to_dpo (fproto),
285                                   &pfx.fp_addr,
286                                   ipn->ipn_key->ipnk_sw_if_index,
287                                   ~0, 1, NULL, FIB_ROUTE_PATH_FLAG_NONE);
288
289       vec_validate (ip_neighbor_db[af].ipndb_n_elts_per_fib, fib_index);
290
291       ip_neighbor_db[af].ipndb_n_elts_per_fib[fib_index]++;
292
293       if (1 == ip_neighbor_db[af].ipndb_n_elts_per_fib[fib_index])
294         fib_table_lock (fib_index, fproto, FIB_SOURCE_ADJ);
295     }
296 }
297
298 static void
299 ip_neighbor_adj_fib_remove (ip_neighbor_t * ipn, u32 fib_index)
300 {
301   ip_address_family_t af;
302
303   af = ip_neighbor_get_af (ipn);
304
305   if (FIB_NODE_INDEX_INVALID != ipn->ipn_fib_entry_index)
306     {
307       if (AF_IP6 == af &&
308           ip6_address_is_link_local_unicast (&ip_addr_v6
309                                              (&ipn->ipn_key->ipnk_ip)))
310         {
311           ip6_ll_prefix_t pfx = {
312             .ilp_addr = ip_addr_v6 (&ipn->ipn_key->ipnk_ip),
313             .ilp_sw_if_index = ipn->ipn_key->ipnk_sw_if_index,
314           };
315           ip6_ll_table_entry_delete (&pfx);
316         }
317       else
318         {
319           fib_protocol_t fproto;
320
321           fproto = ip_address_family_to_fib_proto (af);
322
323           fib_prefix_t pfx = {
324             .fp_len = ip_af_type_pfx_len (af),
325             .fp_proto = fproto,
326             .fp_addr = ip_addr_46 (&ipn->ipn_key->ipnk_ip),
327           };
328
329           fib_table_entry_path_remove (fib_index,
330                                        &pfx,
331                                        FIB_SOURCE_ADJ,
332                                        fib_proto_to_dpo (fproto),
333                                        &pfx.fp_addr,
334                                        ipn->ipn_key->ipnk_sw_if_index,
335                                        ~0, 1, FIB_ROUTE_PATH_FLAG_NONE);
336
337           ip_neighbor_db[af].ipndb_n_elts_per_fib[fib_index]--;
338
339           if (0 == ip_neighbor_db[af].ipndb_n_elts_per_fib[fib_index])
340             fib_table_unlock (fib_index, fproto, FIB_SOURCE_ADJ);
341         }
342     }
343 }
344
345 static void
346 ip_neighbor_mk_complete (adj_index_t ai, ip_neighbor_t * ipn)
347 {
348   adj_nbr_update_rewrite (ai, ADJ_NBR_REWRITE_FLAG_COMPLETE,
349                           ethernet_build_rewrite (vnet_get_main (),
350                                                   ipn->
351                                                   ipn_key->ipnk_sw_if_index,
352                                                   adj_get_link_type (ai),
353                                                   ipn->ipn_mac.bytes));
354 }
355
356 static void
357 ip_neighbor_mk_incomplete (adj_index_t ai)
358 {
359   ip_adjacency_t *adj = adj_get (ai);
360
361   adj_nbr_update_rewrite (ai,
362                           ADJ_NBR_REWRITE_FLAG_INCOMPLETE,
363                           ethernet_build_rewrite (vnet_get_main (),
364                                                   adj->
365                                                   rewrite_header.sw_if_index,
366                                                   VNET_LINK_ARP,
367                                                   VNET_REWRITE_FOR_SW_INTERFACE_ADDRESS_BROADCAST));
368 }
369
370 static adj_walk_rc_t
371 ip_neighbor_mk_complete_walk (adj_index_t ai, void *ctx)
372 {
373   ip_neighbor_t *ipn = ctx;
374
375   ip_neighbor_mk_complete (ai, ipn);
376
377   return (ADJ_WALK_RC_CONTINUE);
378 }
379
380 static adj_walk_rc_t
381 ip_neighbor_mk_incomplete_walk (adj_index_t ai, void *ctx)
382 {
383   ip_neighbor_mk_incomplete (ai);
384
385   return (ADJ_WALK_RC_CONTINUE);
386 }
387
388 static void
389 ip_neighbor_destroy (ip_neighbor_t * ipn)
390 {
391   ip_address_family_t af;
392
393   af = ip_neighbor_get_af (ipn);
394
395   IP_NEIGHBOR_DBG ("free: %U", format_ip_neighbor,
396                    ip_neighbor_get_index (ipn));
397
398   ip_neighbor_publish (ip_neighbor_get_index (ipn),
399                        IP_NEIGHBOR_EVENT_REMOVED);
400
401   adj_nbr_walk_nh (ipn->ipn_key->ipnk_sw_if_index,
402                    ip_address_family_to_fib_proto (af),
403                    &ip_addr_46 (&ipn->ipn_key->ipnk_ip),
404                    ip_neighbor_mk_incomplete_walk, ipn);
405   ip_neighbor_adj_fib_remove
406     (ipn,
407      fib_table_get_index_for_sw_if_index
408      (ip_address_family_to_fib_proto (af), ipn->ipn_key->ipnk_sw_if_index));
409
410   ip_neighbor_list_remove (ipn);
411   ip_neighbor_db_remove (ipn);
412   clib_mem_free (ipn->ipn_key);
413
414   pool_put (ip_neighbor_pool, ipn);
415 }
416
417 static bool
418 ip_neighbor_force_reuse (ip_address_family_t af)
419 {
420   if (!ip_neighbor_db[af].ipndb_recycle)
421     return false;
422
423   /* pluck the oldest entry, which is the one from the end of the list */
424   ip_neighbor_elt_t *elt, *head;
425
426   head = pool_elt_at_index (ip_neighbor_elt_pool, ip_neighbor_list_head[af]);
427
428   if (clib_llist_is_empty (ip_neighbor_elt_pool, ipne_anchor, head))
429     return (false);
430
431   elt = clib_llist_prev (ip_neighbor_elt_pool, ipne_anchor, head);
432   ip_neighbor_destroy (ip_neighbor_get (elt->ipne_index));
433
434   return (true);
435 }
436
437 static ip_neighbor_t *
438 ip_neighbor_alloc (const ip_neighbor_key_t * key,
439                    const mac_address_t * mac, ip_neighbor_flags_t flags)
440 {
441   ip_address_family_t af;
442   ip_neighbor_t *ipn;
443
444   af = ip_addr_version (&key->ipnk_ip);
445
446   if (ip_neighbor_db[af].ipndb_limit &&
447       (ip_neighbor_db[af].ipndb_n_elts >= ip_neighbor_db[af].ipndb_limit))
448     {
449       if (!ip_neighbor_force_reuse (af))
450         return (NULL);
451     }
452
453   pool_get_zero (ip_neighbor_pool, ipn);
454
455   ipn->ipn_key = clib_mem_alloc (sizeof (*ipn->ipn_key));
456   clib_memcpy (ipn->ipn_key, key, sizeof (*ipn->ipn_key));
457
458   ipn->ipn_fib_entry_index = FIB_NODE_INDEX_INVALID;
459   ipn->ipn_flags = flags;
460   ipn->ipn_elt = ~0;
461
462   mac_address_copy (&ipn->ipn_mac, mac);
463
464   ip_neighbor_db_add (ipn);
465
466   /* create the adj-fib. the entry in the FIB table for the peer's interface */
467   if (!(ipn->ipn_flags & IP_NEIGHBOR_FLAG_NO_FIB_ENTRY))
468     ip_neighbor_adj_fib_add
469       (ipn, fib_table_get_index_for_sw_if_index
470        (ip_address_family_to_fib_proto (af), ipn->ipn_key->ipnk_sw_if_index));
471
472   return (ipn);
473 }
474
475 int
476 ip_neighbor_add (const ip_address_t * ip,
477                  const mac_address_t * mac,
478                  u32 sw_if_index,
479                  ip_neighbor_flags_t flags, u32 * stats_index)
480 {
481   fib_protocol_t fproto;
482   ip_neighbor_t *ipn;
483
484   /* main thread only */
485   ASSERT (0 == vlib_get_thread_index ());
486
487   fproto = ip_address_family_to_fib_proto (ip_addr_version (ip));
488
489   const ip_neighbor_key_t key = {
490     .ipnk_ip = *ip,
491     .ipnk_sw_if_index = sw_if_index,
492   };
493
494   ipn = ip_neighbor_db_find (&key);
495
496   if (ipn)
497     {
498       IP_NEIGHBOR_DBG ("update: %U, %U",
499                        format_vnet_sw_if_index_name, vnet_get_main (),
500                        sw_if_index, format_ip_address, ip,
501                        format_ip_neighbor_flags, flags, format_mac_address_t,
502                        mac);
503
504       ip_neighbor_touch (ipn);
505
506       /* Refuse to over-write static neighbor entry. */
507       if (!(flags & IP_NEIGHBOR_FLAG_STATIC) &&
508           (ipn->ipn_flags & IP_NEIGHBOR_FLAG_STATIC))
509         {
510           /* if MAC address match, still check to send event */
511           if (0 == mac_address_cmp (&ipn->ipn_mac, mac))
512             goto check_customers;
513           return -2;
514         }
515
516       /* A dynamic entry can become static, but not vice-versa.
517        * i.e. since if it was programmed by the CP then it must
518        * be removed by the CP */
519       if ((flags & IP_NEIGHBOR_FLAG_STATIC) &&
520           !(ipn->ipn_flags & IP_NEIGHBOR_FLAG_STATIC))
521         {
522           ip_neighbor_list_remove (ipn);
523           ipn->ipn_flags |= IP_NEIGHBOR_FLAG_STATIC;
524           ipn->ipn_flags &= ~IP_NEIGHBOR_FLAG_DYNAMIC;
525         }
526
527       /*
528        * prevent a DoS attack from the data-plane that
529        * spams us with no-op updates to the MAC address
530        */
531       if (0 == mac_address_cmp (&ipn->ipn_mac, mac))
532         {
533           ip_neighbor_refresh (ipn);
534           goto check_customers;
535         }
536
537       mac_address_copy (&ipn->ipn_mac, mac);
538     }
539   else
540     {
541       IP_NEIGHBOR_INFO ("add: %U, %U",
542                         format_vnet_sw_if_index_name, vnet_get_main (),
543                         sw_if_index, format_ip_address, ip,
544                         format_ip_neighbor_flags, flags, format_mac_address_t,
545                         mac);
546
547       ipn = ip_neighbor_alloc (&key, mac, flags);
548
549       if (NULL == ipn)
550         return VNET_API_ERROR_LIMIT_EXCEEDED;
551     }
552
553   /* Update time stamp and flags. */
554   ip_neighbor_refresh (ipn);
555
556   adj_nbr_walk_nh (ipn->ipn_key->ipnk_sw_if_index,
557                    fproto, &ip_addr_46 (&ipn->ipn_key->ipnk_ip),
558                    ip_neighbor_mk_complete_walk, ipn);
559
560 check_customers:
561   /* Customer(s) requesting event for this address? */
562   ip_neighbor_publish (ip_neighbor_get_index (ipn), IP_NEIGHBOR_EVENT_ADDED);
563
564   if (stats_index)
565     *stats_index = adj_nbr_find (fproto,
566                                  fib_proto_to_link (fproto),
567                                  &ip_addr_46 (&ipn->ipn_key->ipnk_ip),
568                                  ipn->ipn_key->ipnk_sw_if_index);
569   return 0;
570 }
571
572 int
573 ip_neighbor_del (const ip_address_t * ip, u32 sw_if_index)
574 {
575   ip_neighbor_t *ipn;
576
577   /* main thread only */
578   ASSERT (0 == vlib_get_thread_index ());
579
580   IP_NEIGHBOR_INFO ("delete: %U, %U",
581                     format_vnet_sw_if_index_name, vnet_get_main (),
582                     sw_if_index, format_ip_address, ip);
583
584   const ip_neighbor_key_t key = {
585     .ipnk_ip = *ip,
586     .ipnk_sw_if_index = sw_if_index,
587   };
588
589   ipn = ip_neighbor_db_find (&key);
590
591   if (NULL == ipn)
592     return (VNET_API_ERROR_NO_SUCH_ENTRY);
593
594   ip_neighbor_destroy (ipn);
595
596   return (0);
597 }
598
599 typedef struct ip_neighbor_del_all_ctx_t_
600 {
601   index_t *ipn_del;
602 } ip_neighbor_del_all_ctx_t;
603
604 static walk_rc_t
605 ip_neighbor_del_all_walk_cb (index_t ipni, void *arg)
606 {
607   ip_neighbor_del_all_ctx_t *ctx = arg;
608
609   vec_add1 (ctx->ipn_del, ipni);
610
611   return (WALK_CONTINUE);
612 }
613
614 void
615 ip_neighbor_del_all (ip_address_family_t af, u32 sw_if_index)
616 {
617   IP_NEIGHBOR_INFO ("delete-all: %U, %U",
618                     format_ip_address_family, af,
619                     format_vnet_sw_if_index_name, vnet_get_main (),
620                     sw_if_index);
621
622   ip_neighbor_del_all_ctx_t ctx = {
623     .ipn_del = NULL,
624   };
625   index_t *ipni;
626
627   ip_neighbor_walk (af, sw_if_index, ip_neighbor_del_all_walk_cb, &ctx);
628
629   vec_foreach (ipni,
630                ctx.ipn_del) ip_neighbor_destroy (ip_neighbor_get (*ipni));
631   vec_free (ctx.ipn_del);
632 }
633
634 void
635 ip_neighbor_update (vnet_main_t * vnm, adj_index_t ai)
636 {
637   ip_neighbor_t *ipn;
638   ip_adjacency_t *adj;
639
640   adj = adj_get (ai);
641
642   ip_neighbor_key_t key = {
643     .ipnk_sw_if_index = adj->rewrite_header.sw_if_index,
644   };
645
646   ip_address_from_46 (&adj->sub_type.nbr.next_hop,
647                       adj->ia_nh_proto, &key.ipnk_ip);
648
649   ipn = ip_neighbor_db_find (&key);
650
651   switch (adj->lookup_next_index)
652     {
653     case IP_LOOKUP_NEXT_ARP:
654       if (NULL != ipn)
655         {
656           adj_nbr_walk_nh (adj->rewrite_header.sw_if_index,
657                            adj->ia_nh_proto,
658                            &adj->sub_type.nbr.next_hop,
659                            ip_neighbor_mk_complete_walk, ipn);
660         }
661       else
662         {
663           /*
664            * no matching ARP entry.
665            * construct the rewrite required to for an ARP packet, and stick
666            * that in the adj's pipe to smoke.
667            */
668           adj_nbr_update_rewrite
669             (ai,
670              ADJ_NBR_REWRITE_FLAG_INCOMPLETE,
671              ethernet_build_rewrite
672              (vnm,
673               adj->rewrite_header.sw_if_index,
674               VNET_LINK_ARP,
675               VNET_REWRITE_FOR_SW_INTERFACE_ADDRESS_BROADCAST));
676
677           /*
678            * since the FIB has added this adj for a route, it makes sense it
679            * may want to forward traffic sometime soon. Let's send a
680            * speculative ARP. just one. If we were to do periodically that
681            * wouldn't be bad either, but that's more code than i'm prepared to
682            * write at this time for relatively little reward.
683            */
684           /*
685            * adj_nbr_update_rewrite may actually call fib_walk_sync.
686            * fib_walk_sync may allocate a new adjacency and potentially cause
687            * a realloc for adj_pool. When that happens, adj pointer is no
688            * longer valid here.x We refresh adj pointer accordingly.
689            */
690           adj = adj_get (ai);
691           ip_neighbor_probe (adj);
692         }
693       break;
694     case IP_LOOKUP_NEXT_GLEAN:
695     case IP_LOOKUP_NEXT_BCAST:
696     case IP_LOOKUP_NEXT_MCAST:
697     case IP_LOOKUP_NEXT_DROP:
698     case IP_LOOKUP_NEXT_PUNT:
699     case IP_LOOKUP_NEXT_LOCAL:
700     case IP_LOOKUP_NEXT_REWRITE:
701     case IP_LOOKUP_NEXT_MCAST_MIDCHAIN:
702     case IP_LOOKUP_NEXT_MIDCHAIN:
703     case IP_LOOKUP_NEXT_ICMP_ERROR:
704     case IP_LOOKUP_N_NEXT:
705       ASSERT (0);
706       break;
707     }
708 }
709
710 void
711 ip_neighbor_learn (const ip_neighbor_learn_t * l)
712 {
713   ip_neighbor_add (&l->ip, &l->mac, l->sw_if_index,
714                    IP_NEIGHBOR_FLAG_DYNAMIC, NULL);
715 }
716
717 static clib_error_t *
718 ip_neighbor_cmd (vlib_main_t * vm,
719                  unformat_input_t * input, vlib_cli_command_t * cmd)
720 {
721   ip_address_t ip = IP_ADDRESS_V6_ALL_0S;
722   mac_address_t mac = ZERO_MAC_ADDRESS;
723   vnet_main_t *vnm = vnet_get_main ();
724   ip_neighbor_flags_t flags;
725   u32 sw_if_index = ~0;
726   int is_add = 1;
727   int count = 1;
728
729   flags = IP_NEIGHBOR_FLAG_DYNAMIC;
730
731   while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
732     {
733       /* set ip arp TenGigE1/1/0/1 1.2.3.4 aa:bb:... or aabb.ccdd... */
734       if (unformat (input, "%U %U %U",
735                     unformat_vnet_sw_interface, vnm, &sw_if_index,
736                     unformat_ip_address, &ip, unformat_mac_address_t, &mac))
737         ;
738       else if (unformat (input, "delete") || unformat (input, "del"))
739         is_add = 0;
740       else if (unformat (input, "static"))
741         {
742           flags |= IP_NEIGHBOR_FLAG_STATIC;
743           flags &= ~IP_NEIGHBOR_FLAG_DYNAMIC;
744         }
745       else if (unformat (input, "no-fib-entry"))
746         flags |= IP_NEIGHBOR_FLAG_NO_FIB_ENTRY;
747       else if (unformat (input, "count %d", &count))
748         ;
749       else
750         break;
751     }
752
753   if (sw_if_index == ~0 ||
754       ip_address_is_zero (&ip) || mac_address_is_zero (&mac))
755     return clib_error_return (0,
756                               "specify interface, IP address and MAC: `%U'",
757                               format_unformat_error, input);
758
759   while (count)
760     {
761       if (is_add)
762         ip_neighbor_add (&ip, &mac, sw_if_index, flags, NULL);
763       else
764         ip_neighbor_del (&ip, sw_if_index);
765
766       ip_address_increment (&ip);
767       mac_address_increment (&mac);
768
769       --count;
770     }
771
772   return NULL;
773 }
774
775 /* *INDENT-OFF* */
776 /*?
777  * Add or delete IPv4 ARP cache entries.
778  *
779  * @note 'set ip neighbor' options (e.g. delete, static, 'fib-id <id>',
780  * 'count <number>', 'interface ip4_addr mac_addr') can be added in
781  * any order and combination.
782  *
783  * @cliexpar
784  * @parblock
785  * Add or delete IPv4 ARP cache entries as follows. MAC Address can be in
786  * either aa:bb:cc:dd:ee:ff format or aabb.ccdd.eeff format.
787  * @cliexcmd{set ip neighbor GigabitEthernet2/0/0 6.0.0.3 dead.beef.babe}
788  * @cliexcmd{set ip neighbor delete GigabitEthernet2/0/0 6.0.0.3 de:ad:be:ef:ba:be}
789  *
790  * To add or delete an IPv4 ARP cache entry to or from a specific fib
791  * table:
792  * @cliexcmd{set ip neighbor fib-id 1 GigabitEthernet2/0/0 6.0.0.3 dead.beef.babe}
793  * @cliexcmd{set ip neighbor fib-id 1 delete GigabitEthernet2/0/0 6.0.0.3 dead.beef.babe}
794  *
795  * Add or delete IPv4 static ARP cache entries as follows:
796  * @cliexcmd{set ip neighbor static GigabitEthernet2/0/0 6.0.0.3 dead.beef.babe}
797  * @cliexcmd{set ip neighbor static delete GigabitEthernet2/0/0 6.0.0.3 dead.beef.babe}
798  *
799  * For testing / debugging purposes, the 'set ip neighbor' command can add or
800  * delete multiple entries. Supply the 'count N' parameter:
801  * @cliexcmd{set ip neighbor count 10 GigabitEthernet2/0/0 6.0.0.3 dead.beef.babe}
802  * @endparblock
803  ?*/
804 VLIB_CLI_COMMAND (ip_neighbor_command, static) = {
805   .path = "set ip neighbor",
806   .short_help =
807   "set ip neighbor [del] <intfc> <ip-address> <mac-address> [static] [no-fib-entry] [count <count>] [fib-id <fib-id>] [proxy <lo-addr> - <hi-addr>]",
808   .function = ip_neighbor_cmd,
809 };
810 VLIB_CLI_COMMAND (ip_neighbor_command2, static) = {
811   .path = "ip neighbor",
812   .short_help =
813   "ip neighbor [del] <intfc> <ip-address> <mac-address> [static] [no-fib-entry] [count <count>] [fib-id <fib-id>] [proxy <lo-addr> - <hi-addr>]",
814   .function = ip_neighbor_cmd,
815 };
816 /* *INDENT-ON* */
817
818 static int
819 ip_neighbor_sort (void *a1, void *a2)
820 {
821   index_t *ipni1 = a1, *ipni2 = a2;
822   ip_neighbor_t *ipn1, *ipn2;
823   int cmp;
824
825   ipn1 = ip_neighbor_get (*ipni1);
826   ipn2 = ip_neighbor_get (*ipni2);
827
828   cmp = vnet_sw_interface_compare (vnet_get_main (),
829                                    ipn1->ipn_key->ipnk_sw_if_index,
830                                    ipn2->ipn_key->ipnk_sw_if_index);
831   if (!cmp)
832     cmp = ip_address_cmp (&ipn1->ipn_key->ipnk_ip, &ipn2->ipn_key->ipnk_ip);
833   return cmp;
834 }
835
836 static index_t *
837 ip_neighbor_entries (u32 sw_if_index, ip_address_family_t af)
838 {
839   index_t *ipnis = NULL;
840   ip_neighbor_t *ipn;
841
842   /* *INDENT-OFF* */
843   pool_foreach (ipn, ip_neighbor_pool,
844   ({
845     if ((sw_if_index == ~0 ||
846         ipn->ipn_key->ipnk_sw_if_index == sw_if_index) &&
847         (N_AF == af ||
848          ip_neighbor_get_af(ipn) == af))
849        vec_add1 (ipnis, ip_neighbor_get_index(ipn));
850   }));
851
852   /* *INDENT-ON* */
853
854   if (ipnis)
855     vec_sort_with_function (ipnis, ip_neighbor_sort);
856   return ipnis;
857 }
858
859 static clib_error_t *
860 ip_neighbor_show_sorted_i (vlib_main_t * vm,
861                            unformat_input_t * input,
862                            vlib_cli_command_t * cmd, ip_address_family_t af)
863 {
864   ip_neighbor_elt_t *elt, *head;
865
866   head = pool_elt_at_index (ip_neighbor_elt_pool, ip_neighbor_list_head[af]);
867
868
869   vlib_cli_output (vm, "%=12s%=40s%=6s%=20s%=24s", "Time", "IP",
870                    "Flags", "Ethernet", "Interface");
871
872   /* *INDENT-OFF*/
873   /* the list is time sorted, newest first, so start from the back
874    * and work forwards. Stop when we get to one that is alive */
875   clib_llist_foreach_reverse(ip_neighbor_elt_pool,
876                              ipne_anchor, head, elt,
877   ({
878     vlib_cli_output (vm, "%U", format_ip_neighbor, elt->ipne_index);
879   }));
880   /* *INDENT-ON*/
881
882   return (NULL);
883 }
884
885 static clib_error_t *
886 ip_neighbor_show_i (vlib_main_t * vm,
887                     unformat_input_t * input,
888                     vlib_cli_command_t * cmd, ip_address_family_t af)
889 {
890   index_t *ipni, *ipnis = NULL;
891   u32 sw_if_index;
892
893   /* Filter entries by interface if given. */
894   sw_if_index = ~0;
895   (void) unformat_user (input, unformat_vnet_sw_interface, vnet_get_main (),
896                         &sw_if_index);
897
898   ipnis = ip_neighbor_entries (sw_if_index, af);
899
900   if (ipnis)
901     vlib_cli_output (vm, "%=12s%=40s%=6s%=20s%=24s", "Time", "IP",
902                      "Flags", "Ethernet", "Interface");
903
904   vec_foreach (ipni, ipnis)
905   {
906     vlib_cli_output (vm, "%U", format_ip_neighbor, *ipni);
907   }
908   vec_free (ipnis);
909
910   return (NULL);
911 }
912
913 static clib_error_t *
914 ip_neighbor_show (vlib_main_t * vm,
915                   unformat_input_t * input, vlib_cli_command_t * cmd)
916 {
917   return (ip_neighbor_show_i (vm, input, cmd, N_AF));
918 }
919
920 static clib_error_t *
921 ip6_neighbor_show (vlib_main_t * vm,
922                    unformat_input_t * input, vlib_cli_command_t * cmd)
923 {
924   return (ip_neighbor_show_i (vm, input, cmd, AF_IP6));
925 }
926
927 static clib_error_t *
928 ip4_neighbor_show (vlib_main_t * vm,
929                    unformat_input_t * input, vlib_cli_command_t * cmd)
930 {
931   return (ip_neighbor_show_i (vm, input, cmd, AF_IP4));
932 }
933
934 static clib_error_t *
935 ip6_neighbor_show_sorted (vlib_main_t * vm,
936                           unformat_input_t * input, vlib_cli_command_t * cmd)
937 {
938   return (ip_neighbor_show_sorted_i (vm, input, cmd, AF_IP6));
939 }
940
941 static clib_error_t *
942 ip4_neighbor_show_sorted (vlib_main_t * vm,
943                           unformat_input_t * input, vlib_cli_command_t * cmd)
944 {
945   return (ip_neighbor_show_sorted_i (vm, input, cmd, AF_IP4));
946 }
947
948 /*?
949  * Display all the IP neighbor entries.
950  *
951  * @cliexpar
952  * Example of how to display the IPv4 ARP table:
953  * @cliexstart{show ip neighbor}
954  *    Time      FIB        IP4       Flags      Ethernet              Interface
955  *    346.3028   0       6.1.1.3            de:ad:be:ef:ba:be   GigabitEthernet2/0/0
956  *   3077.4271   0       6.1.1.4       S    de:ad:be:ef:ff:ff   GigabitEthernet2/0/0
957  *   2998.6409   1       6.2.2.3            de:ad:be:ef:00:01   GigabitEthernet2/0/0
958  * Proxy arps enabled for:
959  * Fib_index 0   6.0.0.1 - 6.0.0.11
960  * @cliexend
961  ?*/
962 /* *INDENT-OFF* */
963 VLIB_CLI_COMMAND (show_ip_neighbors_cmd_node, static) = {
964   .path = "show ip neighbors",
965   .function = ip_neighbor_show,
966   .short_help = "show ip neighbors [interface]",
967 };
968 VLIB_CLI_COMMAND (show_ip4_neighbors_cmd_node, static) = {
969   .path = "show ip4 neighbors",
970   .function = ip4_neighbor_show,
971   .short_help = "show ip4 neighbors [interface]",
972 };
973 VLIB_CLI_COMMAND (show_ip6_neighbors_cmd_node, static) = {
974   .path = "show ip6 neighbors",
975   .function = ip6_neighbor_show,
976   .short_help = "show ip6 neighbors [interface]",
977 };
978 VLIB_CLI_COMMAND (show_ip_neighbor_cmd_node, static) = {
979   .path = "show ip neighbor",
980   .function = ip_neighbor_show,
981   .short_help = "show ip neighbor [interface]",
982 };
983 VLIB_CLI_COMMAND (show_ip4_neighbor_cmd_node, static) = {
984   .path = "show ip4 neighbor",
985   .function = ip4_neighbor_show,
986   .short_help = "show ip4 neighbor [interface]",
987 };
988 VLIB_CLI_COMMAND (show_ip6_neighbor_cmd_node, static) = {
989   .path = "show ip6 neighbor",
990   .function = ip6_neighbor_show,
991   .short_help = "show ip6 neighbor [interface]",
992 };
993 VLIB_CLI_COMMAND (show_ip4_neighbor_sorted_cmd_node, static) = {
994   .path = "show ip4 neighbor-sorted",
995   .function = ip4_neighbor_show_sorted,
996   .short_help = "show ip4 neighbor-sorted",
997 };
998 VLIB_CLI_COMMAND (show_ip6_neighbor_sorted_cmd_node, static) = {
999   .path = "show ip6 neighbor-sorted",
1000   .function = ip6_neighbor_show_sorted,
1001   .short_help = "show ip6 neighbor-sorted",
1002 };
1003 /* *INDENT-ON* */
1004
1005 static ip_neighbor_vft_t ip_nbr_vfts[N_AF];
1006
1007 void
1008 ip_neighbor_register (ip_address_family_t af, const ip_neighbor_vft_t * vft)
1009 {
1010   ip_nbr_vfts[af] = *vft;
1011 }
1012
1013 void
1014 ip_neighbor_probe_dst (const ip_adjacency_t * adj, const ip46_address_t * dst)
1015 {
1016   if (!vnet_sw_interface_is_admin_up (vnet_get_main (),
1017                                       adj->rewrite_header.sw_if_index))
1018     return;
1019
1020   switch (adj->ia_nh_proto)
1021     {
1022     case FIB_PROTOCOL_IP6:
1023       ip6_neighbor_probe_dst (adj, &dst->ip6);
1024       break;
1025     case FIB_PROTOCOL_IP4:
1026       ip4_neighbor_probe_dst (adj, &dst->ip4);
1027       break;
1028     case FIB_PROTOCOL_MPLS:
1029       ASSERT (0);
1030       break;
1031     }
1032 }
1033
1034 void
1035 ip_neighbor_probe (const ip_adjacency_t * adj)
1036 {
1037   ip_neighbor_probe_dst (adj, &adj->sub_type.nbr.next_hop);
1038 }
1039
1040 void
1041 ip_neighbor_walk (ip_address_family_t af,
1042                   u32 sw_if_index, ip_neighbor_walk_cb_t cb, void *ctx)
1043 {
1044   ip_neighbor_key_t *key;
1045   index_t ipni;
1046
1047   if (~0 == sw_if_index)
1048     {
1049       uword **hash;
1050
1051       vec_foreach (hash, ip_neighbor_db[af].ipndb_hash)
1052       {
1053           /* *INDENT-OFF* */
1054           hash_foreach (key, ipni, *hash,
1055           ({
1056             if (WALK_STOP == cb (ipni, ctx))
1057               break;
1058           }));
1059           /* *INDENT-ON* */
1060       }
1061     }
1062   else
1063     {
1064       uword *hash;
1065
1066       if (vec_len (ip_neighbor_db[af].ipndb_hash) <= sw_if_index)
1067         return;
1068       hash = ip_neighbor_db[af].ipndb_hash[sw_if_index];
1069
1070       /* *INDENT-OFF* */
1071       hash_foreach (key, ipni, hash,
1072       ({
1073         if (WALK_STOP == cb (ipni, ctx))
1074           break;
1075       }));
1076       /* *INDENT-ON* */
1077     }
1078 }
1079
1080 int
1081 ip4_neighbor_proxy_add (u32 fib_index,
1082                         const ip4_address_t * start,
1083                         const ip4_address_t * end)
1084 {
1085   if (ip_nbr_vfts[AF_IP4].inv_proxy4_add)
1086     {
1087       return (ip_nbr_vfts[AF_IP4].inv_proxy4_add (fib_index, start, end));
1088     }
1089
1090   return (-1);
1091 }
1092
1093 int
1094 ip4_neighbor_proxy_delete (u32 fib_index,
1095                            const ip4_address_t * start,
1096                            const ip4_address_t * end)
1097 {
1098   if (ip_nbr_vfts[AF_IP4].inv_proxy4_del)
1099     {
1100       return (ip_nbr_vfts[AF_IP4].inv_proxy4_del (fib_index, start, end));
1101     }
1102   return -1;
1103 }
1104
1105 int
1106 ip4_neighbor_proxy_enable (u32 sw_if_index)
1107 {
1108   if (ip_nbr_vfts[AF_IP4].inv_proxy4_enable)
1109     {
1110       return (ip_nbr_vfts[AF_IP4].inv_proxy4_enable (sw_if_index));
1111     }
1112   return -1;
1113 }
1114
1115 int
1116 ip4_neighbor_proxy_disable (u32 sw_if_index)
1117 {
1118   if (ip_nbr_vfts[AF_IP4].inv_proxy4_disable)
1119     {
1120       return (ip_nbr_vfts[AF_IP4].inv_proxy4_disable (sw_if_index));
1121     }
1122   return -1;
1123 }
1124
1125 int
1126 ip6_neighbor_proxy_add (u32 sw_if_index, const ip6_address_t * addr)
1127 {
1128   if (ip_nbr_vfts[AF_IP6].inv_proxy6_add)
1129     {
1130       return (ip_nbr_vfts[AF_IP6].inv_proxy6_add (sw_if_index, addr));
1131     }
1132   return -1;
1133 }
1134
1135 int
1136 ip6_neighbor_proxy_del (u32 sw_if_index, const ip6_address_t * addr)
1137 {
1138   if (ip_nbr_vfts[AF_IP6].inv_proxy6_del)
1139     {
1140       return (ip_nbr_vfts[AF_IP6].inv_proxy6_del (sw_if_index, addr));
1141     }
1142   return -1;
1143 }
1144
1145 static void
1146 ip_neighbor_ethernet_change_mac (ethernet_main_t * em,
1147                                  u32 sw_if_index, uword opaque)
1148 {
1149   ip_neighbor_t *ipn;
1150   adj_index_t ai;
1151
1152   IP_NEIGHBOR_DBG ("mac-change: %U",
1153                    format_vnet_sw_if_index_name, vnet_get_main (),
1154                    sw_if_index);
1155
1156   /* *INDENT-OFF* */
1157   pool_foreach (ipn, ip_neighbor_pool,
1158   ({
1159     if (ipn->ipn_key->ipnk_sw_if_index == sw_if_index)
1160       adj_nbr_walk_nh (ipn->ipn_key->ipnk_sw_if_index,
1161                        ip_address_family_to_fib_proto(ip_neighbor_get_af(ipn)),
1162                        &ip_addr_46(&ipn->ipn_key->ipnk_ip),
1163                        ip_neighbor_mk_complete_walk,
1164                        ipn);
1165   }));
1166   /* *INDENT-ON* */
1167
1168   ai = adj_glean_get (FIB_PROTOCOL_IP4, sw_if_index);
1169
1170   if (ADJ_INDEX_INVALID != ai)
1171     adj_glean_update_rewrite (ai);
1172 }
1173
1174 void
1175 ip_neighbor_populate (ip_address_family_t af, u32 sw_if_index)
1176 {
1177   index_t *ipnis = NULL, *ipni;
1178   ip_neighbor_t *ipn;
1179
1180   IP_NEIGHBOR_DBG ("populate: %U %U",
1181                    format_vnet_sw_if_index_name, vnet_get_main (),
1182                    sw_if_index, format_ip_address_family, af);
1183
1184   /* *INDENT-OFF* */
1185   pool_foreach (ipn, ip_neighbor_pool,
1186   ({
1187     if (ip_neighbor_get_af(ipn) == af &&
1188         ipn->ipn_key->ipnk_sw_if_index == sw_if_index)
1189       vec_add1 (ipnis, ipn - ip_neighbor_pool);
1190   }));
1191   /* *INDENT-ON* */
1192
1193   vec_foreach (ipni, ipnis)
1194   {
1195     ipn = ip_neighbor_get (*ipni);
1196
1197     adj_nbr_walk_nh (ipn->ipn_key->ipnk_sw_if_index,
1198                      ip_address_family_to_fib_proto (ip_neighbor_get_af
1199                                                      (ipn)),
1200                      &ip_addr_46 (&ipn->ipn_key->ipnk_ip),
1201                      ip_neighbor_mk_complete_walk, ipn);
1202   }
1203   vec_free (ipnis);
1204 }
1205
1206 void
1207 ip_neighbor_flush (ip_address_family_t af, u32 sw_if_index)
1208 {
1209   index_t *ipnis = NULL, *ipni;
1210   ip_neighbor_t *ipn;
1211
1212
1213   IP_NEIGHBOR_DBG ("flush: %U %U",
1214                    format_vnet_sw_if_index_name, vnet_get_main (),
1215                    sw_if_index, format_ip_address_family, af);
1216
1217   /* *INDENT-OFF* */
1218   pool_foreach (ipn, ip_neighbor_pool,
1219   ({
1220     if (ip_neighbor_get_af(ipn) == af &&
1221         ipn->ipn_key->ipnk_sw_if_index == sw_if_index &&
1222         ip_neighbor_is_dynamic (ipn))
1223       vec_add1 (ipnis, ipn - ip_neighbor_pool);
1224   }));
1225   /* *INDENT-ON* */
1226
1227   vec_foreach (ipni, ipnis) ip_neighbor_destroy (ip_neighbor_get (*ipni));
1228   vec_free (ipnis);
1229 }
1230
1231 static walk_rc_t
1232 ip_neighbor_mark_one (index_t ipni, void *ctx)
1233 {
1234   ip_neighbor_t *ipn;
1235
1236   ipn = ip_neighbor_get (ipni);
1237
1238   ipn->ipn_flags |= IP_NEIGHBOR_FLAG_STALE;
1239
1240   return (WALK_CONTINUE);
1241 }
1242
1243 void
1244 ip_neighbor_mark (ip_address_family_t af)
1245 {
1246   ip_neighbor_walk (af, ~0, ip_neighbor_mark_one, NULL);
1247 }
1248
1249 typedef struct ip_neighbor_sweep_ctx_t_
1250 {
1251   index_t *ipnsc_stale;
1252 } ip_neighbor_sweep_ctx_t;
1253
1254 static walk_rc_t
1255 ip_neighbor_sweep_one (index_t ipni, void *arg)
1256 {
1257   ip_neighbor_sweep_ctx_t *ctx = arg;
1258   ip_neighbor_t *ipn;
1259
1260   ipn = ip_neighbor_get (ipni);
1261
1262   if (ipn->ipn_flags & IP_NEIGHBOR_FLAG_STALE)
1263     {
1264       vec_add1 (ctx->ipnsc_stale, ipni);
1265     }
1266
1267   return (WALK_CONTINUE);
1268 }
1269
1270 void
1271 ip_neighbor_sweep (ip_address_family_t af)
1272 {
1273   ip_neighbor_sweep_ctx_t ctx = { };
1274   index_t *ipni;
1275
1276   ip_neighbor_walk (af, ~0, ip_neighbor_sweep_one, &ctx);
1277
1278   vec_foreach (ipni, ctx.ipnsc_stale)
1279   {
1280     ip_neighbor_destroy (ip_neighbor_get (*ipni));
1281   }
1282   vec_free (ctx.ipnsc_stale);
1283 }
1284
1285 /*
1286  * Remove any arp entries associated with the specified interface
1287  */
1288 static clib_error_t *
1289 ip_neighbor_interface_admin_change (vnet_main_t * vnm,
1290                                     u32 sw_if_index, u32 flags)
1291 {
1292   ip_address_family_t af;
1293
1294   IP_NEIGHBOR_DBG ("interface-admin: %U  %s",
1295                    format_vnet_sw_if_index_name, vnet_get_main (),
1296                    sw_if_index,
1297                    (flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP ? "up" : "down"));
1298
1299   if (flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP)
1300     {
1301       FOR_EACH_IP_ADDRESS_FAMILY (af) ip_neighbor_populate (af, sw_if_index);
1302     }
1303   else
1304     {
1305       /* admin down, flush all neighbours */
1306       FOR_EACH_IP_ADDRESS_FAMILY (af) ip_neighbor_flush (af, sw_if_index);
1307     }
1308
1309   return (NULL);
1310 }
1311
1312 VNET_SW_INTERFACE_ADMIN_UP_DOWN_FUNCTION (ip_neighbor_interface_admin_change);
1313
1314 /*
1315  * Remove any arp entries associated with the specified interface
1316  */
1317 static clib_error_t *
1318 ip_neighbor_delete_sw_interface (vnet_main_t * vnm,
1319                                  u32 sw_if_index, u32 is_add)
1320 {
1321   IP_NEIGHBOR_DBG ("interface-change: %U  %s",
1322                    format_vnet_sw_if_index_name, vnet_get_main (),
1323                    sw_if_index, (is_add ? "add" : "del"));
1324
1325   if (!is_add && sw_if_index != ~0)
1326     {
1327       ip_address_family_t af;
1328
1329       FOR_EACH_IP_ADDRESS_FAMILY (af) ip_neighbor_flush (af, sw_if_index);
1330     }
1331
1332   return (NULL);
1333 }
1334
1335 VNET_SW_INTERFACE_ADD_DEL_FUNCTION (ip_neighbor_delete_sw_interface);
1336
1337 typedef struct ip_neighbor_walk_covered_ctx_t_
1338 {
1339   ip_address_t addr;
1340   u32 length;
1341   index_t *ipnis;
1342 } ip_neighbor_walk_covered_ctx_t;
1343
1344 static walk_rc_t
1345 ip_neighbor_walk_covered (index_t ipni, void *arg)
1346 {
1347   ip_neighbor_walk_covered_ctx_t *ctx = arg;
1348   ip_neighbor_t *ipn;
1349
1350   ipn = ip_neighbor_get (ipni);
1351
1352   if (AF_IP4 == ip_addr_version (&ctx->addr))
1353     {
1354       if (ip4_destination_matches_route (&ip4_main,
1355                                          &ip_addr_v4 (&ipn->ipn_key->ipnk_ip),
1356                                          &ip_addr_v4 (&ctx->addr),
1357                                          ctx->length) &&
1358           ip_neighbor_is_dynamic (ipn))
1359         {
1360           vec_add1 (ctx->ipnis, ip_neighbor_get_index (ipn));
1361         }
1362     }
1363   else if (AF_IP6 == ip_addr_version (&ctx->addr))
1364     {
1365       if (ip6_destination_matches_route (&ip6_main,
1366                                          &ip_addr_v6 (&ipn->ipn_key->ipnk_ip),
1367                                          &ip_addr_v6 (&ctx->addr),
1368                                          ctx->length) &&
1369           ip_neighbor_is_dynamic (ipn))
1370         {
1371           vec_add1 (ctx->ipnis, ip_neighbor_get_index (ipn));
1372         }
1373     }
1374   return (WALK_CONTINUE);
1375 }
1376
1377
1378 /*
1379  * callback when an interface address is added or deleted
1380  */
1381 static void
1382 ip_neighbor_add_del_interface_address_v4 (ip4_main_t * im,
1383                                           uword opaque,
1384                                           u32 sw_if_index,
1385                                           ip4_address_t * address,
1386                                           u32 address_length,
1387                                           u32 if_address_index, u32 is_del)
1388 {
1389   /*
1390    * Flush the ARP cache of all entries covered by the address
1391    * that is being removed.
1392    */
1393   IP_NEIGHBOR_DBG ("addr-%d: %U, %U/%d",
1394                    (is_del ? "del" : "add"),
1395                    format_vnet_sw_if_index_name, vnet_get_main (),
1396                    sw_if_index, format_ip4_address, address, address_length);
1397
1398   if (is_del)
1399     {
1400       /* *INDENT-OFF* */
1401       ip_neighbor_walk_covered_ctx_t ctx = {
1402         .addr = {
1403           .ip.ip4 = *address,
1404           .version = AF_IP4,
1405         },
1406         .length = address_length,
1407       };
1408       /* *INDENT-ON* */
1409       index_t *ipni;
1410
1411       ip_neighbor_walk (AF_IP4, sw_if_index, ip_neighbor_walk_covered, &ctx);
1412
1413       vec_foreach (ipni, ctx.ipnis)
1414         ip_neighbor_destroy (ip_neighbor_get (*ipni));
1415
1416       vec_free (ctx.ipnis);
1417     }
1418 }
1419
1420 /*
1421  * callback when an interface address is added or deleted
1422  */
1423 static void
1424 ip_neighbor_add_del_interface_address_v6 (ip6_main_t * im,
1425                                           uword opaque,
1426                                           u32 sw_if_index,
1427                                           ip6_address_t * address,
1428                                           u32 address_length,
1429                                           u32 if_address_index, u32 is_del)
1430 {
1431   /*
1432    * Flush the ARP cache of all entries covered by the address
1433    * that is being removed.
1434    */
1435   IP_NEIGHBOR_DBG ("addr-change: %U, %U/%d %s",
1436                    format_vnet_sw_if_index_name, vnet_get_main (),
1437                    sw_if_index, format_ip6_address, address, address_length,
1438                    (is_del ? "del" : "add"));
1439
1440   if (is_del)
1441     {
1442       /* *INDENT-OFF* */
1443       ip_neighbor_walk_covered_ctx_t ctx = {
1444         .addr = {
1445           .ip.ip6 = *address,
1446           .version = AF_IP6,
1447         },
1448         .length = address_length,
1449       };
1450       /* *INDENT-ON* */
1451       index_t *ipni;
1452
1453       ip_neighbor_walk (AF_IP6, sw_if_index, ip_neighbor_walk_covered, &ctx);
1454
1455       vec_foreach (ipni, ctx.ipnis)
1456         ip_neighbor_destroy (ip_neighbor_get (*ipni));
1457
1458       vec_free (ctx.ipnis);
1459     }
1460 }
1461
1462 typedef struct ip_neighbor_table_bind_ctx_t_
1463 {
1464   u32 new_fib_index;
1465   u32 old_fib_index;
1466 } ip_neighbor_table_bind_ctx_t;
1467
1468 static walk_rc_t
1469 ip_neighbor_walk_table_bind (index_t ipni, void *arg)
1470 {
1471   ip_neighbor_table_bind_ctx_t *ctx = arg;
1472   ip_neighbor_t *ipn;
1473
1474   ipn = ip_neighbor_get (ipni);
1475   ip_neighbor_adj_fib_remove (ipn, ctx->old_fib_index);
1476   ip_neighbor_adj_fib_add (ipn, ctx->new_fib_index);
1477
1478   return (WALK_CONTINUE);
1479 }
1480
1481 static void
1482 ip_neighbor_table_bind_v4 (ip4_main_t * im,
1483                            uword opaque,
1484                            u32 sw_if_index,
1485                            u32 new_fib_index, u32 old_fib_index)
1486 {
1487   ip_neighbor_table_bind_ctx_t ctx = {
1488     .old_fib_index = old_fib_index,
1489     .new_fib_index = new_fib_index,
1490   };
1491
1492   ip_neighbor_walk (AF_IP4, sw_if_index, ip_neighbor_walk_table_bind, &ctx);
1493 }
1494
1495 static void
1496 ip_neighbor_table_bind_v6 (ip6_main_t * im,
1497                            uword opaque,
1498                            u32 sw_if_index,
1499                            u32 new_fib_index, u32 old_fib_index)
1500 {
1501   ip_neighbor_table_bind_ctx_t ctx = {
1502     .old_fib_index = old_fib_index,
1503     .new_fib_index = new_fib_index,
1504   };
1505
1506   ip_neighbor_walk (AF_IP6, sw_if_index, ip_neighbor_walk_table_bind, &ctx);
1507 }
1508
1509 typedef enum ip_neighbor_age_state_t_
1510 {
1511   IP_NEIGHBOR_AGE_ALIVE,
1512   IP_NEIGHBOR_AGE_PROBE,
1513   IP_NEIGHBOR_AGE_DEAD,
1514 } ip_neighbor_age_state_t;
1515
1516 #define IP_NEIGHBOR_PROCESS_SLEEP_LONG (0)
1517
1518 static ip_neighbor_age_state_t
1519 ip_neighbour_age_out (index_t ipni, f64 now, f64 * wait)
1520 {
1521   ip_address_family_t af;
1522   ip_neighbor_t *ipn;
1523   u32 ipndb_age;
1524   u32 ttl;
1525
1526   ipn = ip_neighbor_get (ipni);
1527   af = ip_neighbor_get_af (ipn);
1528   ipndb_age = ip_neighbor_db[af].ipndb_age;
1529   ttl = now - ipn->ipn_time_last_updated;
1530   *wait = ipndb_age;
1531
1532   if (ttl > ipndb_age)
1533     {
1534       IP_NEIGHBOR_DBG ("aged: %U @%f - %f > %d",
1535                        format_ip_neighbor, ipni, now,
1536                        ipn->ipn_time_last_updated, ipndb_age);
1537       if (ipn->ipn_n_probes > 2)
1538         {
1539           /* 3 strikes and yea-re out */
1540           IP_NEIGHBOR_DBG ("dead: %U", format_ip_neighbor, ipni);
1541           *wait = 1;
1542           return (IP_NEIGHBOR_AGE_DEAD);
1543         }
1544       else
1545         {
1546           adj_index_t ai;
1547
1548           ai = adj_glean_get (ip_address_family_to_fib_proto (af),
1549                               ip_neighbor_get_sw_if_index (ipn));
1550
1551           if (ADJ_INDEX_INVALID != ai)
1552             ip_neighbor_probe_dst (adj_get (ai),
1553                                    &ip_addr_46 (&ipn->ipn_key->ipnk_ip));
1554
1555           ipn->ipn_n_probes++;
1556           *wait = 1;
1557         }
1558     }
1559   else
1560     {
1561       /* here we are sure that ttl <= ipndb_age */
1562       *wait = ipndb_age - ttl + 1;
1563       return (IP_NEIGHBOR_AGE_ALIVE);
1564     }
1565
1566   return (IP_NEIGHBOR_AGE_PROBE);
1567 }
1568
1569 typedef enum ip_neighbor_process_event_t_
1570 {
1571   IP_NEIGHBOR_AGE_PROCESS_WAKEUP,
1572 } ip_neighbor_process_event_t;
1573
1574 static uword
1575 ip_neighbor_age_loop (vlib_main_t * vm,
1576                       vlib_node_runtime_t * rt,
1577                       vlib_frame_t * f, ip_address_family_t af)
1578 {
1579   uword event_type, *event_data = NULL;
1580   f64 timeout;
1581
1582   /* Set the timeout to an effectively infinite value when the process starts */
1583   timeout = IP_NEIGHBOR_PROCESS_SLEEP_LONG;
1584
1585   while (1)
1586     {
1587       f64 now;
1588
1589       if (!timeout)
1590         vlib_process_wait_for_event (vm);
1591       else
1592         vlib_process_wait_for_event_or_clock (vm, timeout);
1593
1594       event_type = vlib_process_get_events (vm, &event_data);
1595       vec_reset_length (event_data);
1596
1597       now = vlib_time_now (vm);
1598
1599       switch (event_type)
1600         {
1601         case ~0:
1602           {
1603             /* timer expired */
1604             ip_neighbor_elt_t *elt, *head;
1605             f64 wait;
1606
1607             timeout = ip_neighbor_db[af].ipndb_age;
1608             head = pool_elt_at_index (ip_neighbor_elt_pool,
1609                                       ip_neighbor_list_head[af]);
1610
1611           /* *INDENT-OFF*/
1612           /* the list is time sorted, newest first, so start from the back
1613            * and work forwards. Stop when we get to one that is alive */
1614           restart:
1615           clib_llist_foreach_reverse(ip_neighbor_elt_pool,
1616                                      ipne_anchor, head, elt,
1617           ({
1618             ip_neighbor_age_state_t res;
1619
1620             res = ip_neighbour_age_out(elt->ipne_index, now, &wait);
1621
1622             if (IP_NEIGHBOR_AGE_ALIVE == res) {
1623               /* the oldest neighbor has not yet expired, go back to sleep */
1624               timeout = clib_min (wait, timeout);
1625               break;
1626             }
1627             else if (IP_NEIGHBOR_AGE_DEAD == res) {
1628               /* the oldest neighbor is dead, pop it, then restart the walk
1629                * again from the back */
1630               ip_neighbor_destroy (ip_neighbor_get(elt->ipne_index));
1631               goto restart;
1632             }
1633
1634             timeout = clib_min (wait, timeout);
1635           }));
1636           /* *INDENT-ON* */
1637             break;
1638           }
1639         case IP_NEIGHBOR_AGE_PROCESS_WAKEUP:
1640           {
1641
1642             if (!ip_neighbor_db[af].ipndb_age)
1643               {
1644                 /* aging has been disabled */
1645                 timeout = 0;
1646                 break;
1647               }
1648             ip_neighbor_elt_t *elt, *head;
1649
1650             head = pool_elt_at_index (ip_neighbor_elt_pool,
1651                                       ip_neighbor_list_head[af]);
1652             /* no neighbors yet */
1653             if (clib_llist_is_empty (ip_neighbor_elt_pool, ipne_anchor, head))
1654               {
1655                 timeout = ip_neighbor_db[af].ipndb_age;
1656                 break;
1657               }
1658
1659             /* poke the oldset neighbour for aging, which returns how long we sleep for */
1660             elt = clib_llist_prev (ip_neighbor_elt_pool, ipne_anchor, head);
1661             ip_neighbour_age_out (elt->ipne_index, now, &timeout);
1662             break;
1663           }
1664         }
1665     }
1666   return 0;
1667 }
1668
1669 static uword
1670 ip4_neighbor_age_process (vlib_main_t * vm,
1671                           vlib_node_runtime_t * rt, vlib_frame_t * f)
1672 {
1673   return (ip_neighbor_age_loop (vm, rt, f, AF_IP4));
1674 }
1675
1676 static uword
1677 ip6_neighbor_age_process (vlib_main_t * vm,
1678                           vlib_node_runtime_t * rt, vlib_frame_t * f)
1679 {
1680   return (ip_neighbor_age_loop (vm, rt, f, AF_IP6));
1681 }
1682
1683 /* *INDENT-OFF* */
1684 VLIB_REGISTER_NODE (ip4_neighbor_age_process_node,static) = {
1685   .function = ip4_neighbor_age_process,
1686   .type = VLIB_NODE_TYPE_PROCESS,
1687   .name = "ip4-neighbor-age-process",
1688 };
1689 VLIB_REGISTER_NODE (ip6_neighbor_age_process_node,static) = {
1690   .function = ip6_neighbor_age_process,
1691   .type = VLIB_NODE_TYPE_PROCESS,
1692   .name = "ip6-neighbor-age-process",
1693 };
1694 /* *INDENT-ON* */
1695
1696 int
1697 ip_neighbor_config (ip_address_family_t af, u32 limit, u32 age, bool recycle)
1698 {
1699   ip_neighbor_db[af].ipndb_limit = limit;
1700   ip_neighbor_db[af].ipndb_recycle = recycle;
1701   ip_neighbor_db[af].ipndb_age = age;
1702
1703   vlib_process_signal_event (vlib_get_main (),
1704                              (AF_IP4 == af ?
1705                               ip4_neighbor_age_process_node.index :
1706                               ip6_neighbor_age_process_node.index),
1707                              IP_NEIGHBOR_AGE_PROCESS_WAKEUP, 0);
1708
1709   return (0);
1710 }
1711
1712 static clib_error_t *
1713 ip_neighbor_config_show (vlib_main_t * vm,
1714                          unformat_input_t * input, vlib_cli_command_t * cmd)
1715 {
1716   ip_address_family_t af;
1717
1718   /* *INDENT-OFF* */
1719   FOR_EACH_IP_ADDRESS_FAMILY(af) {
1720     vlib_cli_output (vm, "%U:", format_ip_address_family, af);
1721     vlib_cli_output (vm, "  limit:%d, age:%d, recycle:%d",
1722                      ip_neighbor_db[af].ipndb_limit,
1723                      ip_neighbor_db[af].ipndb_age,
1724                      ip_neighbor_db[af].ipndb_recycle);
1725   }
1726
1727   /* *INDENT-ON* */
1728   return (NULL);
1729 }
1730
1731 /* *INDENT-OFF* */
1732 VLIB_CLI_COMMAND (show_ip_neighbor_cfg_cmd_node, static) = {
1733   .path = "show ip neighbor-config",
1734   .function = ip_neighbor_config_show,
1735   .short_help = "show ip neighbor-config",
1736 };
1737 /* *INDENT-ON* */
1738
1739 static clib_error_t *
1740 ip_neighbor_init (vlib_main_t * vm)
1741 {
1742   {
1743     ip4_add_del_interface_address_callback_t cb = {
1744       .function = ip_neighbor_add_del_interface_address_v4,
1745     };
1746     vec_add1 (ip4_main.add_del_interface_address_callbacks, cb);
1747   }
1748   {
1749     ip6_add_del_interface_address_callback_t cb = {
1750       .function = ip_neighbor_add_del_interface_address_v6,
1751     };
1752     vec_add1 (ip6_main.add_del_interface_address_callbacks, cb);
1753   }
1754   {
1755     ip4_table_bind_callback_t cb = {
1756       .function = ip_neighbor_table_bind_v4,
1757     };
1758     vec_add1 (ip4_main.table_bind_callbacks, cb);
1759   }
1760   {
1761     ip6_table_bind_callback_t cb = {
1762       .function = ip_neighbor_table_bind_v6,
1763     };
1764     vec_add1 (ip6_main.table_bind_callbacks, cb);
1765   }
1766   {
1767     ethernet_address_change_ctx_t ctx = {
1768       .function = ip_neighbor_ethernet_change_mac,
1769       .function_opaque = 0,
1770     };
1771     vec_add1 (ethernet_main.address_change_callbacks, ctx);
1772   }
1773
1774   ipn_logger = vlib_log_register_class ("ip", "neighbor");
1775
1776   ip_address_family_t af;
1777
1778   FOR_EACH_IP_ADDRESS_FAMILY (af)
1779     ip_neighbor_list_head[af] =
1780     clib_llist_make_head (ip_neighbor_elt_pool, ipne_anchor);
1781
1782   return (NULL);
1783 }
1784
1785 /* *INDENT-OFF* */
1786 VLIB_INIT_FUNCTION (ip_neighbor_init) =
1787 {
1788   .runs_after = VLIB_INITS("ip_main_init"),
1789 };
1790 /* *INDENT-ON* */
1791
1792 /*
1793  * fd.io coding-style-patch-verification: ON
1794  *
1795  * Local Variables:
1796  * eval: (c-set-style "gnu")
1797  * End:
1798  */