6ba191abaf6bcc893841922b035d12113d13328d
[vpp.git] / src / vnet / ip-neighbor / ip_neighbor.c
1 /*
2  * src/vnet/ip/ip_neighboor.c: ip neighbor generic handling
3  *
4  * Copyright (c) 2018 Cisco and/or its affiliates.
5  * Licensed under the Apache License, Version 2.0 (the "License");
6  * you may not use this file except in compliance with the License.
7  * You may obtain a copy of the License at:
8  *
9  *     http://www.apache.org/licenses/LICENSE-2.0
10  *
11  * Unless required by applicable law or agreed to in writing, software
12  * distributed under the License is distributed on an "AS IS" BASIS,
13  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14  * See the License for the specific language governing permissions and
15  * limitations under the License.
16  */
17
18 #include <vppinfra/llist.h>
19
20 #include <vnet/ip-neighbor/ip_neighbor.h>
21 #include <vnet/ip-neighbor/ip4_neighbor.h>
22 #include <vnet/ip-neighbor/ip6_neighbor.h>
23 #include <vnet/ip-neighbor/ip_neighbor_watch.h>
24
25 #include <vnet/ip/ip6_ll_table.h>
26 #include <vnet/ip/ip46_address.h>
27 #include <vnet/fib/fib_table.h>
28 #include <vnet/adj/adj_mcast.h>
29
30 /** Pool for All IP neighbors */
31 static ip_neighbor_t *ip_neighbor_pool;
32
33 /** protocol specific lists of time sorted neighbors */
34 index_t ip_neighbor_list_head[N_AF];
35
36 typedef struct ip_neighbor_elt_t_
37 {
38   clib_llist_anchor_t ipne_anchor;
39   index_t ipne_index;
40 } ip_neighbor_elt_t;
41
42 /** Pool of linked list elemeents */
43 ip_neighbor_elt_t *ip_neighbor_elt_pool;
44
45 typedef struct ip_neighbor_db_t_
46 {
47   /** per interface hash */
48   uword **ipndb_hash;
49   /** per-protocol limit - max number of neighbors*/
50   u32 ipndb_limit;
51   /** max age of a neighbor before it's forcibly evicted */
52   u32 ipndb_age;
53   /** when the limit is reached and new neighbors are created, should
54    * we recycle an old one */
55   bool ipndb_recycle;
56   /** per-protocol number of elements */
57   u32 ipndb_n_elts;
58   /** per-protocol number of elements per-fib-index*/
59   u32 *ipndb_n_elts_per_fib;
60 } ip_neighbor_db_t;
61
62 static vlib_log_class_t ipn_logger;
63
64 /* DBs of neighbours one per AF */
65 /* *INDENT-OFF* */
66 static ip_neighbor_db_t ip_neighbor_db[N_AF] = {
67   [AF_IP4] = {
68     .ipndb_limit = 50000,
69     /* Default to not aging and not recycling */
70     .ipndb_age = 0,
71     .ipndb_recycle = false,
72   },
73   [AF_IP6] = {
74     .ipndb_limit = 50000,
75     /* Default to not aging and not recycling */
76     .ipndb_age = 0,
77     .ipndb_recycle = false,
78   }
79 };
80 /* *INDENT-ON* */
81
82 #define IP_NEIGHBOR_DBG(...)                           \
83     vlib_log_debug (ipn_logger, __VA_ARGS__);
84
85 #define IP_NEIGHBOR_INFO(...)                          \
86     vlib_log_notice (ipn_logger, __VA_ARGS__);
87
88 ip_neighbor_t *
89 ip_neighbor_get (index_t ipni)
90 {
91   if (pool_is_free_index (ip_neighbor_pool, ipni))
92     return (NULL);
93
94   return (pool_elt_at_index (ip_neighbor_pool, ipni));
95 }
96
97 static index_t
98 ip_neighbor_get_index (const ip_neighbor_t * ipn)
99 {
100   return (ipn - ip_neighbor_pool);
101 }
102
103 static void
104 ip_neighbor_touch (ip_neighbor_t * ipn)
105 {
106   ipn->ipn_flags &= ~IP_NEIGHBOR_FLAG_STALE;
107 }
108
109 static bool
110 ip_neighbor_is_dynamic (const ip_neighbor_t * ipn)
111 {
112   return (ipn->ipn_flags & IP_NEIGHBOR_FLAG_DYNAMIC);
113 }
114
115 const ip_address_t *
116 ip_neighbor_get_ip (const ip_neighbor_t * ipn)
117 {
118   return (&ipn->ipn_key->ipnk_ip);
119 }
120
121 ip_address_family_t
122 ip_neighbor_get_af (const ip_neighbor_t * ipn)
123 {
124   return (ip_addr_version (&ipn->ipn_key->ipnk_ip));
125 }
126
127 const mac_address_t *
128 ip_neighbor_get_mac (const ip_neighbor_t * ipn)
129 {
130   return (&ipn->ipn_mac);
131 }
132
133 const u32
134 ip_neighbor_get_sw_if_index (const ip_neighbor_t * ipn)
135 {
136   return (ipn->ipn_key->ipnk_sw_if_index);
137 }
138
139 static void
140 ip_neighbor_list_remove (ip_neighbor_t * ipn)
141 {
142   /* new neighbours, are added to the head of the list, since the
143    * list is time sorted, newest first */
144   ip_neighbor_elt_t *elt;
145
146   if (~0 != ipn->ipn_elt)
147     {
148       elt = pool_elt_at_index (ip_neighbor_elt_pool, ipn->ipn_elt);
149
150       clib_llist_remove (ip_neighbor_elt_pool, ipne_anchor, elt);
151
152       ipn->ipn_elt = ~0;
153     }
154 }
155
156 static void
157 ip_neighbor_refresh (ip_neighbor_t * ipn)
158 {
159   /* new neighbours, are added to the head of the list, since the
160    * list is time sorted, newest first */
161   ip_neighbor_elt_t *elt, *head;
162
163   ip_neighbor_touch (ipn);
164   ipn->ipn_time_last_updated = vlib_time_now (vlib_get_main ());
165   ipn->ipn_n_probes = 0;
166
167   if (ip_neighbor_is_dynamic (ipn))
168     {
169       if (~0 == ipn->ipn_elt)
170         /* first time insertion */
171         pool_get_zero (ip_neighbor_elt_pool, elt);
172       else
173         {
174           /* already inserted - extract first */
175           elt = pool_elt_at_index (ip_neighbor_elt_pool, ipn->ipn_elt);
176
177           clib_llist_remove (ip_neighbor_elt_pool, ipne_anchor, elt);
178         }
179       head = pool_elt_at_index (ip_neighbor_elt_pool,
180                                 ip_neighbor_list_head[ip_neighbor_get_af
181                                                       (ipn)]);
182
183       elt->ipne_index = ip_neighbor_get_index (ipn);
184       clib_llist_add (ip_neighbor_elt_pool, ipne_anchor, elt, head);
185       ipn->ipn_elt = elt - ip_neighbor_elt_pool;
186     }
187 }
188
189 static void
190 ip_neighbor_db_add (const ip_neighbor_t * ipn)
191 {
192   ip_address_family_t af;
193   u32 sw_if_index;
194
195   af = ip_neighbor_get_af (ipn);
196   sw_if_index = ipn->ipn_key->ipnk_sw_if_index;
197
198   vec_validate (ip_neighbor_db[af].ipndb_hash, sw_if_index);
199
200   if (!ip_neighbor_db[af].ipndb_hash[sw_if_index])
201     ip_neighbor_db[af].ipndb_hash[sw_if_index]
202       = hash_create_mem (0, sizeof (ip_neighbor_key_t), sizeof (index_t));
203
204   hash_set_mem (ip_neighbor_db[af].ipndb_hash[sw_if_index],
205                 ipn->ipn_key, ip_neighbor_get_index (ipn));
206
207   ip_neighbor_db[af].ipndb_n_elts++;
208 }
209
210 static void
211 ip_neighbor_db_remove (const ip_neighbor_t * ipn)
212 {
213   ip_address_family_t af;
214   u32 sw_if_index;
215
216   af = ip_neighbor_get_af (ipn);
217   sw_if_index = ipn->ipn_key->ipnk_sw_if_index;
218
219   vec_validate (ip_neighbor_db[af].ipndb_hash, sw_if_index);
220
221   hash_unset_mem (ip_neighbor_db[af].ipndb_hash[sw_if_index], ipn->ipn_key);
222
223   ip_neighbor_db[af].ipndb_n_elts--;
224 }
225
226 static ip_neighbor_t *
227 ip_neighbor_db_find (const ip_neighbor_key_t * key)
228 {
229   ip_address_family_t af;
230   uword *p;
231
232   af = ip_addr_version (&key->ipnk_ip);
233
234   if (key->ipnk_sw_if_index >= vec_len (ip_neighbor_db[af].ipndb_hash))
235     return NULL;
236
237   p = hash_get_mem (ip_neighbor_db[af].ipndb_hash
238                     [key->ipnk_sw_if_index], key);
239
240   if (p)
241     return ip_neighbor_get (p[0]);
242
243   return (NULL);
244 }
245
246 static u8
247 ip_af_type_pfx_len (ip_address_family_t type)
248 {
249   return (type == AF_IP4 ? 32 : 128);
250 }
251
252 static void
253 ip_neighbor_adj_fib_add (ip_neighbor_t * ipn, u32 fib_index)
254 {
255   ip_address_family_t af;
256
257   af = ip_neighbor_get_af (ipn);
258
259   if (af == AF_IP6 &&
260       ip6_address_is_link_local_unicast (&ip_addr_v6
261                                          (&ipn->ipn_key->ipnk_ip)))
262     {
263       ip6_ll_prefix_t pfx = {
264         .ilp_addr = ip_addr_v6 (&ipn->ipn_key->ipnk_ip),
265         .ilp_sw_if_index = ipn->ipn_key->ipnk_sw_if_index,
266       };
267       ipn->ipn_fib_entry_index =
268         ip6_ll_table_entry_update (&pfx, FIB_ROUTE_PATH_FLAG_NONE);
269     }
270   else
271     {
272       fib_protocol_t fproto;
273
274       fproto = ip_address_family_to_fib_proto (af);
275
276       fib_prefix_t pfx = {
277         .fp_len = ip_af_type_pfx_len (af),
278         .fp_proto = fproto,
279         .fp_addr = ip_addr_46 (&ipn->ipn_key->ipnk_ip),
280       };
281
282       ipn->ipn_fib_entry_index =
283         fib_table_entry_path_add (fib_index, &pfx, FIB_SOURCE_ADJ,
284                                   FIB_ENTRY_FLAG_ATTACHED,
285                                   fib_proto_to_dpo (fproto),
286                                   &pfx.fp_addr,
287                                   ipn->ipn_key->ipnk_sw_if_index,
288                                   ~0, 1, NULL, FIB_ROUTE_PATH_FLAG_NONE);
289
290       vec_validate (ip_neighbor_db[af].ipndb_n_elts_per_fib, fib_index);
291
292       ip_neighbor_db[af].ipndb_n_elts_per_fib[fib_index]++;
293
294       if (1 == ip_neighbor_db[af].ipndb_n_elts_per_fib[fib_index])
295         fib_table_lock (fib_index, fproto, FIB_SOURCE_ADJ);
296     }
297 }
298
299 static void
300 ip_neighbor_adj_fib_remove (ip_neighbor_t * ipn, u32 fib_index)
301 {
302   ip_address_family_t af;
303
304   af = ip_neighbor_get_af (ipn);
305
306   if (FIB_NODE_INDEX_INVALID != ipn->ipn_fib_entry_index)
307     {
308       if (AF_IP6 == af &&
309           ip6_address_is_link_local_unicast (&ip_addr_v6
310                                              (&ipn->ipn_key->ipnk_ip)))
311         {
312           ip6_ll_prefix_t pfx = {
313             .ilp_addr = ip_addr_v6 (&ipn->ipn_key->ipnk_ip),
314             .ilp_sw_if_index = ipn->ipn_key->ipnk_sw_if_index,
315           };
316           ip6_ll_table_entry_delete (&pfx);
317         }
318       else
319         {
320           fib_protocol_t fproto;
321
322           fproto = ip_address_family_to_fib_proto (af);
323
324           fib_prefix_t pfx = {
325             .fp_len = ip_af_type_pfx_len (af),
326             .fp_proto = fproto,
327             .fp_addr = ip_addr_46 (&ipn->ipn_key->ipnk_ip),
328           };
329
330           fib_table_entry_path_remove (fib_index,
331                                        &pfx,
332                                        FIB_SOURCE_ADJ,
333                                        fib_proto_to_dpo (fproto),
334                                        &pfx.fp_addr,
335                                        ipn->ipn_key->ipnk_sw_if_index,
336                                        ~0, 1, FIB_ROUTE_PATH_FLAG_NONE);
337
338           ip_neighbor_db[af].ipndb_n_elts_per_fib[fib_index]--;
339
340           if (0 == ip_neighbor_db[af].ipndb_n_elts_per_fib[fib_index])
341             fib_table_unlock (fib_index, fproto, FIB_SOURCE_ADJ);
342         }
343     }
344 }
345
346 static void
347 ip_neighbor_mk_complete (adj_index_t ai, ip_neighbor_t * ipn)
348 {
349   adj_nbr_update_rewrite (ai, ADJ_NBR_REWRITE_FLAG_COMPLETE,
350                           ethernet_build_rewrite (vnet_get_main (),
351                                                   ipn->
352                                                   ipn_key->ipnk_sw_if_index,
353                                                   adj_get_link_type (ai),
354                                                   ipn->ipn_mac.bytes));
355 }
356
357 static void
358 ip_neighbor_mk_incomplete (adj_index_t ai)
359 {
360   ip_adjacency_t *adj = adj_get (ai);
361
362   adj_nbr_update_rewrite (ai,
363                           ADJ_NBR_REWRITE_FLAG_INCOMPLETE,
364                           ethernet_build_rewrite (vnet_get_main (),
365                                                   adj->
366                                                   rewrite_header.sw_if_index,
367                                                   VNET_LINK_ARP,
368                                                   VNET_REWRITE_FOR_SW_INTERFACE_ADDRESS_BROADCAST));
369 }
370
371 static adj_walk_rc_t
372 ip_neighbor_mk_complete_walk (adj_index_t ai, void *ctx)
373 {
374   ip_neighbor_t *ipn = ctx;
375
376   ip_neighbor_mk_complete (ai, ipn);
377
378   return (ADJ_WALK_RC_CONTINUE);
379 }
380
381 static adj_walk_rc_t
382 ip_neighbor_mk_incomplete_walk (adj_index_t ai, void *ctx)
383 {
384   ip_neighbor_mk_incomplete (ai);
385
386   return (ADJ_WALK_RC_CONTINUE);
387 }
388
389 static void
390 ip_neighbor_destroy (ip_neighbor_t * ipn)
391 {
392   ip_address_family_t af;
393
394   af = ip_neighbor_get_af (ipn);
395
396   IP_NEIGHBOR_DBG ("free: %U", format_ip_neighbor,
397                    ip_neighbor_get_index (ipn));
398
399   ip_neighbor_publish (ip_neighbor_get_index (ipn),
400                        IP_NEIGHBOR_EVENT_REMOVED);
401
402   adj_nbr_walk_nh (ipn->ipn_key->ipnk_sw_if_index,
403                    ip_address_family_to_fib_proto (af),
404                    &ip_addr_46 (&ipn->ipn_key->ipnk_ip),
405                    ip_neighbor_mk_incomplete_walk, ipn);
406   ip_neighbor_adj_fib_remove
407     (ipn,
408      fib_table_get_index_for_sw_if_index
409      (ip_address_family_to_fib_proto (af), ipn->ipn_key->ipnk_sw_if_index));
410
411   ip_neighbor_list_remove (ipn);
412   ip_neighbor_db_remove (ipn);
413   clib_mem_free (ipn->ipn_key);
414
415   pool_put (ip_neighbor_pool, ipn);
416 }
417
418 static bool
419 ip_neighbor_force_reuse (ip_address_family_t af)
420 {
421   if (!ip_neighbor_db[af].ipndb_recycle)
422     return false;
423
424   /* pluck the oldest entry, which is the one from the end of the list */
425   ip_neighbor_elt_t *elt, *head;
426
427   head = pool_elt_at_index (ip_neighbor_elt_pool, ip_neighbor_list_head[af]);
428
429   if (clib_llist_is_empty (ip_neighbor_elt_pool, ipne_anchor, head))
430     return (false);
431
432   elt = clib_llist_prev (ip_neighbor_elt_pool, ipne_anchor, head);
433   ip_neighbor_destroy (ip_neighbor_get (elt->ipne_index));
434
435   return (true);
436 }
437
438 static ip_neighbor_t *
439 ip_neighbor_alloc (const ip_neighbor_key_t * key,
440                    const mac_address_t * mac, ip_neighbor_flags_t flags)
441 {
442   ip_address_family_t af;
443   ip_neighbor_t *ipn;
444
445   af = ip_addr_version (&key->ipnk_ip);
446
447   if (ip_neighbor_db[af].ipndb_limit &&
448       (ip_neighbor_db[af].ipndb_n_elts >= ip_neighbor_db[af].ipndb_limit))
449     {
450       if (!ip_neighbor_force_reuse (af))
451         return (NULL);
452     }
453
454   pool_get_zero (ip_neighbor_pool, ipn);
455
456   ipn->ipn_key = clib_mem_alloc (sizeof (*ipn->ipn_key));
457   clib_memcpy (ipn->ipn_key, key, sizeof (*ipn->ipn_key));
458
459   ipn->ipn_fib_entry_index = FIB_NODE_INDEX_INVALID;
460   ipn->ipn_flags = flags;
461   ipn->ipn_elt = ~0;
462
463   mac_address_copy (&ipn->ipn_mac, mac);
464
465   ip_neighbor_db_add (ipn);
466
467   /* create the adj-fib. the entry in the FIB table for the peer's interface */
468   if (!(ipn->ipn_flags & IP_NEIGHBOR_FLAG_NO_FIB_ENTRY))
469     ip_neighbor_adj_fib_add
470       (ipn, fib_table_get_index_for_sw_if_index
471        (ip_address_family_to_fib_proto (af), ipn->ipn_key->ipnk_sw_if_index));
472
473   return (ipn);
474 }
475
476 int
477 ip_neighbor_add (const ip_address_t * ip,
478                  const mac_address_t * mac,
479                  u32 sw_if_index,
480                  ip_neighbor_flags_t flags, u32 * stats_index)
481 {
482   fib_protocol_t fproto;
483   ip_neighbor_t *ipn;
484
485   /* main thread only */
486   ASSERT (0 == vlib_get_thread_index ());
487
488   fproto = ip_address_family_to_fib_proto (ip_addr_version (ip));
489
490   const ip_neighbor_key_t key = {
491     .ipnk_ip = *ip,
492     .ipnk_sw_if_index = sw_if_index,
493   };
494
495   ipn = ip_neighbor_db_find (&key);
496
497   if (ipn)
498     {
499       IP_NEIGHBOR_DBG ("update: %U, %U",
500                        format_vnet_sw_if_index_name, vnet_get_main (),
501                        sw_if_index, format_ip_address, ip,
502                        format_ip_neighbor_flags, flags, format_mac_address_t,
503                        mac);
504
505       ip_neighbor_touch (ipn);
506
507       /* Refuse to over-write static neighbor entry. */
508       if (!(flags & IP_NEIGHBOR_FLAG_STATIC) &&
509           (ipn->ipn_flags & IP_NEIGHBOR_FLAG_STATIC))
510         {
511           /* if MAC address match, still check to send event */
512           if (0 == mac_address_cmp (&ipn->ipn_mac, mac))
513             goto check_customers;
514           return -2;
515         }
516
517       /* A dynamic entry can become static, but not vice-versa.
518        * i.e. since if it was programmed by the CP then it must
519        * be removed by the CP */
520       if ((flags & IP_NEIGHBOR_FLAG_STATIC) &&
521           !(ipn->ipn_flags & IP_NEIGHBOR_FLAG_STATIC))
522         {
523           ip_neighbor_list_remove (ipn);
524           ipn->ipn_flags |= IP_NEIGHBOR_FLAG_STATIC;
525           ipn->ipn_flags &= ~IP_NEIGHBOR_FLAG_DYNAMIC;
526         }
527
528       /*
529        * prevent a DoS attack from the data-plane that
530        * spams us with no-op updates to the MAC address
531        */
532       if (0 == mac_address_cmp (&ipn->ipn_mac, mac))
533         {
534           ip_neighbor_refresh (ipn);
535           goto check_customers;
536         }
537
538       mac_address_copy (&ipn->ipn_mac, mac);
539     }
540   else
541     {
542       IP_NEIGHBOR_INFO ("add: %U, %U",
543                         format_vnet_sw_if_index_name, vnet_get_main (),
544                         sw_if_index, format_ip_address, ip,
545                         format_ip_neighbor_flags, flags, format_mac_address_t,
546                         mac);
547
548       ipn = ip_neighbor_alloc (&key, mac, flags);
549
550       if (NULL == ipn)
551         return VNET_API_ERROR_LIMIT_EXCEEDED;
552     }
553
554   /* Update time stamp and flags. */
555   ip_neighbor_refresh (ipn);
556
557   adj_nbr_walk_nh (ipn->ipn_key->ipnk_sw_if_index,
558                    fproto, &ip_addr_46 (&ipn->ipn_key->ipnk_ip),
559                    ip_neighbor_mk_complete_walk, ipn);
560
561 check_customers:
562   /* Customer(s) requesting event for this address? */
563   ip_neighbor_publish (ip_neighbor_get_index (ipn), IP_NEIGHBOR_EVENT_ADDED);
564
565   if (stats_index)
566     *stats_index = adj_nbr_find (fproto,
567                                  fib_proto_to_link (fproto),
568                                  &ip_addr_46 (&ipn->ipn_key->ipnk_ip),
569                                  ipn->ipn_key->ipnk_sw_if_index);
570   return 0;
571 }
572
573 int
574 ip_neighbor_del (const ip_address_t * ip, u32 sw_if_index)
575 {
576   ip_neighbor_t *ipn;
577
578   /* main thread only */
579   ASSERT (0 == vlib_get_thread_index ());
580
581   IP_NEIGHBOR_INFO ("delete: %U, %U",
582                     format_vnet_sw_if_index_name, vnet_get_main (),
583                     sw_if_index, format_ip_address, ip);
584
585   const ip_neighbor_key_t key = {
586     .ipnk_ip = *ip,
587     .ipnk_sw_if_index = sw_if_index,
588   };
589
590   ipn = ip_neighbor_db_find (&key);
591
592   if (NULL == ipn)
593     return (VNET_API_ERROR_NO_SUCH_ENTRY);
594
595   ip_neighbor_destroy (ipn);
596
597   return (0);
598 }
599
600 typedef struct ip_neighbor_del_all_ctx_t_
601 {
602   index_t *ipn_del;
603 } ip_neighbor_del_all_ctx_t;
604
605 static walk_rc_t
606 ip_neighbor_del_all_walk_cb (index_t ipni, void *arg)
607 {
608   ip_neighbor_del_all_ctx_t *ctx = arg;
609
610   vec_add1 (ctx->ipn_del, ipni);
611
612   return (WALK_CONTINUE);
613 }
614
615 void
616 ip_neighbor_del_all (ip_address_family_t af, u32 sw_if_index)
617 {
618   IP_NEIGHBOR_INFO ("delete-all: %U, %U",
619                     format_ip_address_family, af,
620                     format_vnet_sw_if_index_name, vnet_get_main (),
621                     sw_if_index);
622
623   ip_neighbor_del_all_ctx_t ctx = {
624     .ipn_del = NULL,
625   };
626   index_t *ipni;
627
628   ip_neighbor_walk (af, sw_if_index, ip_neighbor_del_all_walk_cb, &ctx);
629
630   vec_foreach (ipni,
631                ctx.ipn_del) ip_neighbor_destroy (ip_neighbor_get (*ipni));
632   vec_free (ctx.ipn_del);
633 }
634
635 void
636 ip_neighbor_update (vnet_main_t * vnm, adj_index_t ai)
637 {
638   ip_neighbor_t *ipn;
639   ip_adjacency_t *adj;
640
641   adj = adj_get (ai);
642
643   ip_neighbor_key_t key = {
644     .ipnk_sw_if_index = adj->rewrite_header.sw_if_index,
645   };
646
647   ip_address_from_46 (&adj->sub_type.nbr.next_hop,
648                       adj->ia_nh_proto, &key.ipnk_ip);
649
650   ipn = ip_neighbor_db_find (&key);
651
652   switch (adj->lookup_next_index)
653     {
654     case IP_LOOKUP_NEXT_ARP:
655       if (NULL != ipn)
656         {
657           adj_nbr_walk_nh (adj->rewrite_header.sw_if_index,
658                            adj->ia_nh_proto,
659                            &adj->sub_type.nbr.next_hop,
660                            ip_neighbor_mk_complete_walk, ipn);
661         }
662       else
663         {
664           /*
665            * no matching ARP entry.
666            * construct the rewrite required to for an ARP packet, and stick
667            * that in the adj's pipe to smoke.
668            */
669           adj_nbr_update_rewrite
670             (ai,
671              ADJ_NBR_REWRITE_FLAG_INCOMPLETE,
672              ethernet_build_rewrite
673              (vnm,
674               adj->rewrite_header.sw_if_index,
675               VNET_LINK_ARP,
676               VNET_REWRITE_FOR_SW_INTERFACE_ADDRESS_BROADCAST));
677
678           /*
679            * since the FIB has added this adj for a route, it makes sense it
680            * may want to forward traffic sometime soon. Let's send a
681            * speculative ARP. just one. If we were to do periodically that
682            * wouldn't be bad either, but that's more code than i'm prepared to
683            * write at this time for relatively little reward.
684            */
685           /*
686            * adj_nbr_update_rewrite may actually call fib_walk_sync.
687            * fib_walk_sync may allocate a new adjacency and potentially cause
688            * a realloc for adj_pool. When that happens, adj pointer is no
689            * longer valid here.x We refresh adj pointer accordingly.
690            */
691           adj = adj_get (ai);
692           ip_neighbor_probe (adj);
693         }
694       break;
695     case IP_LOOKUP_NEXT_REWRITE:
696       /* Update of an existing rewrite adjacency happens e.g. when the
697        * interface's MAC address changes */
698       if (NULL != ipn)
699         ip_neighbor_mk_complete (ai, ipn);
700       break;
701     case IP_LOOKUP_NEXT_GLEAN:
702     case IP_LOOKUP_NEXT_BCAST:
703     case IP_LOOKUP_NEXT_MCAST:
704     case IP_LOOKUP_NEXT_DROP:
705     case IP_LOOKUP_NEXT_PUNT:
706     case IP_LOOKUP_NEXT_LOCAL:
707     case IP_LOOKUP_NEXT_MCAST_MIDCHAIN:
708     case IP_LOOKUP_NEXT_MIDCHAIN:
709     case IP_LOOKUP_NEXT_ICMP_ERROR:
710     case IP_LOOKUP_N_NEXT:
711       ASSERT (0);
712       break;
713     }
714 }
715
716 void
717 ip_neighbor_learn (const ip_neighbor_learn_t * l)
718 {
719   ip_neighbor_add (&l->ip, &l->mac, l->sw_if_index,
720                    IP_NEIGHBOR_FLAG_DYNAMIC, NULL);
721 }
722
723 static clib_error_t *
724 ip_neighbor_cmd (vlib_main_t * vm,
725                  unformat_input_t * input, vlib_cli_command_t * cmd)
726 {
727   ip_address_t ip = IP_ADDRESS_V6_ALL_0S;
728   mac_address_t mac = ZERO_MAC_ADDRESS;
729   vnet_main_t *vnm = vnet_get_main ();
730   ip_neighbor_flags_t flags;
731   u32 sw_if_index = ~0;
732   int is_add = 1;
733   int count = 1;
734
735   flags = IP_NEIGHBOR_FLAG_DYNAMIC;
736
737   while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
738     {
739       /* set ip arp TenGigE1/1/0/1 1.2.3.4 aa:bb:... or aabb.ccdd... */
740       if (unformat (input, "%U %U %U",
741                     unformat_vnet_sw_interface, vnm, &sw_if_index,
742                     unformat_ip_address, &ip, unformat_mac_address_t, &mac))
743         ;
744       else if (unformat (input, "delete") || unformat (input, "del"))
745         is_add = 0;
746       else if (unformat (input, "static"))
747         {
748           flags |= IP_NEIGHBOR_FLAG_STATIC;
749           flags &= ~IP_NEIGHBOR_FLAG_DYNAMIC;
750         }
751       else if (unformat (input, "no-fib-entry"))
752         flags |= IP_NEIGHBOR_FLAG_NO_FIB_ENTRY;
753       else if (unformat (input, "count %d", &count))
754         ;
755       else
756         break;
757     }
758
759   if (sw_if_index == ~0 ||
760       ip_address_is_zero (&ip) || mac_address_is_zero (&mac))
761     return clib_error_return (0,
762                               "specify interface, IP address and MAC: `%U'",
763                               format_unformat_error, input);
764
765   while (count)
766     {
767       if (is_add)
768         ip_neighbor_add (&ip, &mac, sw_if_index, flags, NULL);
769       else
770         ip_neighbor_del (&ip, sw_if_index);
771
772       ip_address_increment (&ip);
773       mac_address_increment (&mac);
774
775       --count;
776     }
777
778   return NULL;
779 }
780
781 /* *INDENT-OFF* */
782 /*?
783  * Add or delete IPv4 ARP cache entries.
784  *
785  * @note 'set ip neighbor' options (e.g. delete, static, 'fib-id <id>',
786  * 'count <number>', 'interface ip4_addr mac_addr') can be added in
787  * any order and combination.
788  *
789  * @cliexpar
790  * @parblock
791  * Add or delete IPv4 ARP cache entries as follows. MAC Address can be in
792  * either aa:bb:cc:dd:ee:ff format or aabb.ccdd.eeff format.
793  * @cliexcmd{set ip neighbor GigabitEthernet2/0/0 6.0.0.3 dead.beef.babe}
794  * @cliexcmd{set ip neighbor delete GigabitEthernet2/0/0 6.0.0.3 de:ad:be:ef:ba:be}
795  *
796  * To add or delete an IPv4 ARP cache entry to or from a specific fib
797  * table:
798  * @cliexcmd{set ip neighbor fib-id 1 GigabitEthernet2/0/0 6.0.0.3 dead.beef.babe}
799  * @cliexcmd{set ip neighbor fib-id 1 delete GigabitEthernet2/0/0 6.0.0.3 dead.beef.babe}
800  *
801  * Add or delete IPv4 static ARP cache entries as follows:
802  * @cliexcmd{set ip neighbor static GigabitEthernet2/0/0 6.0.0.3 dead.beef.babe}
803  * @cliexcmd{set ip neighbor static delete GigabitEthernet2/0/0 6.0.0.3 dead.beef.babe}
804  *
805  * For testing / debugging purposes, the 'set ip neighbor' command can add or
806  * delete multiple entries. Supply the 'count N' parameter:
807  * @cliexcmd{set ip neighbor count 10 GigabitEthernet2/0/0 6.0.0.3 dead.beef.babe}
808  * @endparblock
809  ?*/
810 VLIB_CLI_COMMAND (ip_neighbor_command, static) = {
811   .path = "set ip neighbor",
812   .short_help =
813   "set ip neighbor [del] <intfc> <ip-address> <mac-address> [static] [no-fib-entry] [count <count>] [fib-id <fib-id>] [proxy <lo-addr> - <hi-addr>]",
814   .function = ip_neighbor_cmd,
815 };
816 VLIB_CLI_COMMAND (ip_neighbor_command2, static) = {
817   .path = "ip neighbor",
818   .short_help =
819   "ip neighbor [del] <intfc> <ip-address> <mac-address> [static] [no-fib-entry] [count <count>] [fib-id <fib-id>] [proxy <lo-addr> - <hi-addr>]",
820   .function = ip_neighbor_cmd,
821 };
822 /* *INDENT-ON* */
823
824 static int
825 ip_neighbor_sort (void *a1, void *a2)
826 {
827   index_t *ipni1 = a1, *ipni2 = a2;
828   ip_neighbor_t *ipn1, *ipn2;
829   int cmp;
830
831   ipn1 = ip_neighbor_get (*ipni1);
832   ipn2 = ip_neighbor_get (*ipni2);
833
834   cmp = vnet_sw_interface_compare (vnet_get_main (),
835                                    ipn1->ipn_key->ipnk_sw_if_index,
836                                    ipn2->ipn_key->ipnk_sw_if_index);
837   if (!cmp)
838     cmp = ip_address_cmp (&ipn1->ipn_key->ipnk_ip, &ipn2->ipn_key->ipnk_ip);
839   return cmp;
840 }
841
842 static index_t *
843 ip_neighbor_entries (u32 sw_if_index, ip_address_family_t af)
844 {
845   index_t *ipnis = NULL;
846   ip_neighbor_t *ipn;
847
848   /* *INDENT-OFF* */
849   pool_foreach (ipn, ip_neighbor_pool)
850    {
851     if ((sw_if_index == ~0 ||
852         ipn->ipn_key->ipnk_sw_if_index == sw_if_index) &&
853         (N_AF == af ||
854          ip_neighbor_get_af(ipn) == af))
855        vec_add1 (ipnis, ip_neighbor_get_index(ipn));
856   }
857
858   /* *INDENT-ON* */
859
860   if (ipnis)
861     vec_sort_with_function (ipnis, ip_neighbor_sort);
862   return ipnis;
863 }
864
865 static clib_error_t *
866 ip_neighbor_show_sorted_i (vlib_main_t * vm,
867                            unformat_input_t * input,
868                            vlib_cli_command_t * cmd, ip_address_family_t af)
869 {
870   ip_neighbor_elt_t *elt, *head;
871
872   head = pool_elt_at_index (ip_neighbor_elt_pool, ip_neighbor_list_head[af]);
873
874
875   vlib_cli_output (vm, "%=12s%=40s%=6s%=20s%=24s", "Time", "IP",
876                    "Flags", "Ethernet", "Interface");
877
878   /* *INDENT-OFF*/
879   /* the list is time sorted, newest first, so start from the back
880    * and work forwards. Stop when we get to one that is alive */
881   clib_llist_foreach_reverse(ip_neighbor_elt_pool,
882                              ipne_anchor, head, elt,
883   ({
884     vlib_cli_output (vm, "%U", format_ip_neighbor, elt->ipne_index);
885   }));
886   /* *INDENT-ON*/
887
888   return (NULL);
889 }
890
891 static clib_error_t *
892 ip_neighbor_show_i (vlib_main_t * vm,
893                     unformat_input_t * input,
894                     vlib_cli_command_t * cmd, ip_address_family_t af)
895 {
896   index_t *ipni, *ipnis = NULL;
897   u32 sw_if_index;
898
899   /* Filter entries by interface if given. */
900   sw_if_index = ~0;
901   (void) unformat_user (input, unformat_vnet_sw_interface, vnet_get_main (),
902                         &sw_if_index);
903
904   ipnis = ip_neighbor_entries (sw_if_index, af);
905
906   if (ipnis)
907     vlib_cli_output (vm, "%=12s%=40s%=6s%=20s%=24s", "Time", "IP",
908                      "Flags", "Ethernet", "Interface");
909
910   vec_foreach (ipni, ipnis)
911   {
912     vlib_cli_output (vm, "%U", format_ip_neighbor, *ipni);
913   }
914   vec_free (ipnis);
915
916   return (NULL);
917 }
918
919 static clib_error_t *
920 ip_neighbor_show (vlib_main_t * vm,
921                   unformat_input_t * input, vlib_cli_command_t * cmd)
922 {
923   return (ip_neighbor_show_i (vm, input, cmd, N_AF));
924 }
925
926 static clib_error_t *
927 ip6_neighbor_show (vlib_main_t * vm,
928                    unformat_input_t * input, vlib_cli_command_t * cmd)
929 {
930   return (ip_neighbor_show_i (vm, input, cmd, AF_IP6));
931 }
932
933 static clib_error_t *
934 ip4_neighbor_show (vlib_main_t * vm,
935                    unformat_input_t * input, vlib_cli_command_t * cmd)
936 {
937   return (ip_neighbor_show_i (vm, input, cmd, AF_IP4));
938 }
939
940 static clib_error_t *
941 ip6_neighbor_show_sorted (vlib_main_t * vm,
942                           unformat_input_t * input, vlib_cli_command_t * cmd)
943 {
944   return (ip_neighbor_show_sorted_i (vm, input, cmd, AF_IP6));
945 }
946
947 static clib_error_t *
948 ip4_neighbor_show_sorted (vlib_main_t * vm,
949                           unformat_input_t * input, vlib_cli_command_t * cmd)
950 {
951   return (ip_neighbor_show_sorted_i (vm, input, cmd, AF_IP4));
952 }
953
954 /*?
955  * Display all the IP neighbor entries.
956  *
957  * @cliexpar
958  * Example of how to display the IPv4 ARP table:
959  * @cliexstart{show ip neighbor}
960  *    Time      FIB        IP4       Flags      Ethernet              Interface
961  *    346.3028   0       6.1.1.3            de:ad:be:ef:ba:be   GigabitEthernet2/0/0
962  *   3077.4271   0       6.1.1.4       S    de:ad:be:ef:ff:ff   GigabitEthernet2/0/0
963  *   2998.6409   1       6.2.2.3            de:ad:be:ef:00:01   GigabitEthernet2/0/0
964  * Proxy arps enabled for:
965  * Fib_index 0   6.0.0.1 - 6.0.0.11
966  * @cliexend
967  ?*/
968 /* *INDENT-OFF* */
969 VLIB_CLI_COMMAND (show_ip_neighbors_cmd_node, static) = {
970   .path = "show ip neighbors",
971   .function = ip_neighbor_show,
972   .short_help = "show ip neighbors [interface]",
973 };
974 VLIB_CLI_COMMAND (show_ip4_neighbors_cmd_node, static) = {
975   .path = "show ip4 neighbors",
976   .function = ip4_neighbor_show,
977   .short_help = "show ip4 neighbors [interface]",
978 };
979 VLIB_CLI_COMMAND (show_ip6_neighbors_cmd_node, static) = {
980   .path = "show ip6 neighbors",
981   .function = ip6_neighbor_show,
982   .short_help = "show ip6 neighbors [interface]",
983 };
984 VLIB_CLI_COMMAND (show_ip_neighbor_cmd_node, static) = {
985   .path = "show ip neighbor",
986   .function = ip_neighbor_show,
987   .short_help = "show ip neighbor [interface]",
988 };
989 VLIB_CLI_COMMAND (show_ip4_neighbor_cmd_node, static) = {
990   .path = "show ip4 neighbor",
991   .function = ip4_neighbor_show,
992   .short_help = "show ip4 neighbor [interface]",
993 };
994 VLIB_CLI_COMMAND (show_ip6_neighbor_cmd_node, static) = {
995   .path = "show ip6 neighbor",
996   .function = ip6_neighbor_show,
997   .short_help = "show ip6 neighbor [interface]",
998 };
999 VLIB_CLI_COMMAND (show_ip4_neighbor_sorted_cmd_node, static) = {
1000   .path = "show ip4 neighbor-sorted",
1001   .function = ip4_neighbor_show_sorted,
1002   .short_help = "show ip4 neighbor-sorted",
1003 };
1004 VLIB_CLI_COMMAND (show_ip6_neighbor_sorted_cmd_node, static) = {
1005   .path = "show ip6 neighbor-sorted",
1006   .function = ip6_neighbor_show_sorted,
1007   .short_help = "show ip6 neighbor-sorted",
1008 };
1009 /* *INDENT-ON* */
1010
1011 static ip_neighbor_vft_t ip_nbr_vfts[N_AF];
1012
1013 void
1014 ip_neighbor_register (ip_address_family_t af, const ip_neighbor_vft_t * vft)
1015 {
1016   ip_nbr_vfts[af] = *vft;
1017 }
1018
1019 void
1020 ip_neighbor_probe_dst (u32 sw_if_index,
1021                        ip_address_family_t af, const ip46_address_t * dst)
1022 {
1023   if (!vnet_sw_interface_is_admin_up (vnet_get_main (), sw_if_index))
1024     return;
1025
1026   switch (af)
1027     {
1028     case AF_IP6:
1029       ip6_neighbor_probe_dst (sw_if_index, &dst->ip6);
1030       break;
1031     case AF_IP4:
1032       ip4_neighbor_probe_dst (sw_if_index, &dst->ip4);
1033       break;
1034     }
1035 }
1036
1037 void
1038 ip_neighbor_probe (const ip_adjacency_t * adj)
1039 {
1040   ip_neighbor_probe_dst (adj->rewrite_header.sw_if_index,
1041                          ip_address_family_from_fib_proto (adj->ia_nh_proto),
1042                          &adj->sub_type.nbr.next_hop);
1043 }
1044
1045 void
1046 ip_neighbor_walk (ip_address_family_t af,
1047                   u32 sw_if_index, ip_neighbor_walk_cb_t cb, void *ctx)
1048 {
1049   ip_neighbor_key_t *key;
1050   index_t ipni;
1051
1052   if (~0 == sw_if_index)
1053     {
1054       uword **hash;
1055
1056       vec_foreach (hash, ip_neighbor_db[af].ipndb_hash)
1057       {
1058           /* *INDENT-OFF* */
1059           hash_foreach (key, ipni, *hash,
1060           ({
1061             if (WALK_STOP == cb (ipni, ctx))
1062               break;
1063           }));
1064           /* *INDENT-ON* */
1065       }
1066     }
1067   else
1068     {
1069       uword *hash;
1070
1071       if (vec_len (ip_neighbor_db[af].ipndb_hash) <= sw_if_index)
1072         return;
1073       hash = ip_neighbor_db[af].ipndb_hash[sw_if_index];
1074
1075       /* *INDENT-OFF* */
1076       hash_foreach (key, ipni, hash,
1077       ({
1078         if (WALK_STOP == cb (ipni, ctx))
1079           break;
1080       }));
1081       /* *INDENT-ON* */
1082     }
1083 }
1084
1085 int
1086 ip4_neighbor_proxy_add (u32 fib_index,
1087                         const ip4_address_t * start,
1088                         const ip4_address_t * end)
1089 {
1090   if (ip_nbr_vfts[AF_IP4].inv_proxy4_add)
1091     {
1092       return (ip_nbr_vfts[AF_IP4].inv_proxy4_add (fib_index, start, end));
1093     }
1094
1095   return (-1);
1096 }
1097
1098 int
1099 ip4_neighbor_proxy_delete (u32 fib_index,
1100                            const ip4_address_t * start,
1101                            const ip4_address_t * end)
1102 {
1103   if (ip_nbr_vfts[AF_IP4].inv_proxy4_del)
1104     {
1105       return (ip_nbr_vfts[AF_IP4].inv_proxy4_del (fib_index, start, end));
1106     }
1107   return -1;
1108 }
1109
1110 int
1111 ip4_neighbor_proxy_enable (u32 sw_if_index)
1112 {
1113   if (ip_nbr_vfts[AF_IP4].inv_proxy4_enable)
1114     {
1115       return (ip_nbr_vfts[AF_IP4].inv_proxy4_enable (sw_if_index));
1116     }
1117   return -1;
1118 }
1119
1120 int
1121 ip4_neighbor_proxy_disable (u32 sw_if_index)
1122 {
1123   if (ip_nbr_vfts[AF_IP4].inv_proxy4_disable)
1124     {
1125       return (ip_nbr_vfts[AF_IP4].inv_proxy4_disable (sw_if_index));
1126     }
1127   return -1;
1128 }
1129
1130 int
1131 ip6_neighbor_proxy_add (u32 sw_if_index, const ip6_address_t * addr)
1132 {
1133   if (ip_nbr_vfts[AF_IP6].inv_proxy6_add)
1134     {
1135       return (ip_nbr_vfts[AF_IP6].inv_proxy6_add (sw_if_index, addr));
1136     }
1137   return -1;
1138 }
1139
1140 int
1141 ip6_neighbor_proxy_del (u32 sw_if_index, const ip6_address_t * addr)
1142 {
1143   if (ip_nbr_vfts[AF_IP6].inv_proxy6_del)
1144     {
1145       return (ip_nbr_vfts[AF_IP6].inv_proxy6_del (sw_if_index, addr));
1146     }
1147   return -1;
1148 }
1149
1150 void
1151 ip_neighbor_populate (ip_address_family_t af, u32 sw_if_index)
1152 {
1153   index_t *ipnis = NULL, *ipni;
1154   ip_neighbor_t *ipn;
1155
1156   IP_NEIGHBOR_DBG ("populate: %U %U",
1157                    format_vnet_sw_if_index_name, vnet_get_main (),
1158                    sw_if_index, format_ip_address_family, af);
1159
1160   /* *INDENT-OFF* */
1161   pool_foreach (ipn, ip_neighbor_pool)
1162    {
1163     if (ip_neighbor_get_af(ipn) == af &&
1164         ipn->ipn_key->ipnk_sw_if_index == sw_if_index)
1165       vec_add1 (ipnis, ipn - ip_neighbor_pool);
1166   }
1167   /* *INDENT-ON* */
1168
1169   vec_foreach (ipni, ipnis)
1170   {
1171     ipn = ip_neighbor_get (*ipni);
1172
1173     adj_nbr_walk_nh (ipn->ipn_key->ipnk_sw_if_index,
1174                      ip_address_family_to_fib_proto (ip_neighbor_get_af
1175                                                      (ipn)),
1176                      &ip_addr_46 (&ipn->ipn_key->ipnk_ip),
1177                      ip_neighbor_mk_complete_walk, ipn);
1178   }
1179   vec_free (ipnis);
1180 }
1181
1182 void
1183 ip_neighbor_flush (ip_address_family_t af, u32 sw_if_index)
1184 {
1185   index_t *ipnis = NULL, *ipni;
1186   ip_neighbor_t *ipn;
1187
1188
1189   IP_NEIGHBOR_DBG ("flush: %U %U",
1190                    format_vnet_sw_if_index_name, vnet_get_main (),
1191                    sw_if_index, format_ip_address_family, af);
1192
1193   /* *INDENT-OFF* */
1194   pool_foreach (ipn, ip_neighbor_pool)
1195    {
1196     if (ip_neighbor_get_af(ipn) == af &&
1197         ipn->ipn_key->ipnk_sw_if_index == sw_if_index &&
1198         ip_neighbor_is_dynamic (ipn))
1199       vec_add1 (ipnis, ipn - ip_neighbor_pool);
1200   }
1201   /* *INDENT-ON* */
1202
1203   vec_foreach (ipni, ipnis) ip_neighbor_destroy (ip_neighbor_get (*ipni));
1204   vec_free (ipnis);
1205 }
1206
1207 walk_rc_t
1208 ip_neighbor_mark_one (index_t ipni, void *ctx)
1209 {
1210   ip_neighbor_t *ipn;
1211
1212   ipn = ip_neighbor_get (ipni);
1213
1214   ipn->ipn_flags |= IP_NEIGHBOR_FLAG_STALE;
1215
1216   return (WALK_CONTINUE);
1217 }
1218
1219 void
1220 ip_neighbor_mark (ip_address_family_t af)
1221 {
1222   ip_neighbor_walk (af, ~0, ip_neighbor_mark_one, NULL);
1223 }
1224
1225 typedef struct ip_neighbor_sweep_ctx_t_
1226 {
1227   index_t *ipnsc_stale;
1228 } ip_neighbor_sweep_ctx_t;
1229
1230 static walk_rc_t
1231 ip_neighbor_sweep_one (index_t ipni, void *arg)
1232 {
1233   ip_neighbor_sweep_ctx_t *ctx = arg;
1234   ip_neighbor_t *ipn;
1235
1236   ipn = ip_neighbor_get (ipni);
1237
1238   if (ipn->ipn_flags & IP_NEIGHBOR_FLAG_STALE)
1239     {
1240       vec_add1 (ctx->ipnsc_stale, ipni);
1241     }
1242
1243   return (WALK_CONTINUE);
1244 }
1245
1246 void
1247 ip_neighbor_sweep (ip_address_family_t af)
1248 {
1249   ip_neighbor_sweep_ctx_t ctx = { };
1250   index_t *ipni;
1251
1252   ip_neighbor_walk (af, ~0, ip_neighbor_sweep_one, &ctx);
1253
1254   vec_foreach (ipni, ctx.ipnsc_stale)
1255   {
1256     ip_neighbor_destroy (ip_neighbor_get (*ipni));
1257   }
1258   vec_free (ctx.ipnsc_stale);
1259 }
1260
1261 /*
1262  * Remove any arp entries associated with the specified interface
1263  */
1264 static clib_error_t *
1265 ip_neighbor_interface_admin_change (vnet_main_t * vnm,
1266                                     u32 sw_if_index, u32 flags)
1267 {
1268   ip_address_family_t af;
1269
1270   IP_NEIGHBOR_DBG ("interface-admin: %U  %s",
1271                    format_vnet_sw_if_index_name, vnet_get_main (),
1272                    sw_if_index,
1273                    (flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP ? "up" : "down"));
1274
1275   if (flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP)
1276     {
1277       FOR_EACH_IP_ADDRESS_FAMILY (af) ip_neighbor_populate (af, sw_if_index);
1278     }
1279   else
1280     {
1281       /* admin down, flush all neighbours */
1282       FOR_EACH_IP_ADDRESS_FAMILY (af) ip_neighbor_flush (af, sw_if_index);
1283     }
1284
1285   return (NULL);
1286 }
1287
1288 VNET_SW_INTERFACE_ADMIN_UP_DOWN_FUNCTION (ip_neighbor_interface_admin_change);
1289
1290 /*
1291  * Remove any arp entries associated with the specified interface
1292  */
1293 static clib_error_t *
1294 ip_neighbor_delete_sw_interface (vnet_main_t * vnm,
1295                                  u32 sw_if_index, u32 is_add)
1296 {
1297   IP_NEIGHBOR_DBG ("interface-change: %U  %s",
1298                    format_vnet_sw_if_index_name, vnet_get_main (),
1299                    sw_if_index, (is_add ? "add" : "del"));
1300
1301   if (!is_add && sw_if_index != ~0)
1302     {
1303       ip_address_family_t af;
1304
1305       FOR_EACH_IP_ADDRESS_FAMILY (af) ip_neighbor_flush (af, sw_if_index);
1306     }
1307
1308   return (NULL);
1309 }
1310
1311 VNET_SW_INTERFACE_ADD_DEL_FUNCTION (ip_neighbor_delete_sw_interface);
1312
1313 typedef struct ip_neighbor_walk_covered_ctx_t_
1314 {
1315   ip_address_t addr;
1316   u32 length;
1317   index_t *ipnis;
1318 } ip_neighbor_walk_covered_ctx_t;
1319
1320 static walk_rc_t
1321 ip_neighbor_walk_covered (index_t ipni, void *arg)
1322 {
1323   ip_neighbor_walk_covered_ctx_t *ctx = arg;
1324   ip_neighbor_t *ipn;
1325
1326   ipn = ip_neighbor_get (ipni);
1327
1328   if (AF_IP4 == ip_addr_version (&ctx->addr))
1329     {
1330       if (ip4_destination_matches_route (&ip4_main,
1331                                          &ip_addr_v4 (&ipn->ipn_key->ipnk_ip),
1332                                          &ip_addr_v4 (&ctx->addr),
1333                                          ctx->length) &&
1334           ip_neighbor_is_dynamic (ipn))
1335         {
1336           vec_add1 (ctx->ipnis, ip_neighbor_get_index (ipn));
1337         }
1338     }
1339   else if (AF_IP6 == ip_addr_version (&ctx->addr))
1340     {
1341       if (ip6_destination_matches_route (&ip6_main,
1342                                          &ip_addr_v6 (&ipn->ipn_key->ipnk_ip),
1343                                          &ip_addr_v6 (&ctx->addr),
1344                                          ctx->length) &&
1345           ip_neighbor_is_dynamic (ipn))
1346         {
1347           vec_add1 (ctx->ipnis, ip_neighbor_get_index (ipn));
1348         }
1349     }
1350   return (WALK_CONTINUE);
1351 }
1352
1353
1354 /*
1355  * callback when an interface address is added or deleted
1356  */
1357 static void
1358 ip_neighbor_add_del_interface_address_v4 (ip4_main_t * im,
1359                                           uword opaque,
1360                                           u32 sw_if_index,
1361                                           ip4_address_t * address,
1362                                           u32 address_length,
1363                                           u32 if_address_index, u32 is_del)
1364 {
1365   /*
1366    * Flush the ARP cache of all entries covered by the address
1367    * that is being removed.
1368    */
1369   IP_NEIGHBOR_DBG ("addr-%d: %U, %U/%d",
1370                    (is_del ? "del" : "add"),
1371                    format_vnet_sw_if_index_name, vnet_get_main (),
1372                    sw_if_index, format_ip4_address, address, address_length);
1373
1374   if (is_del)
1375     {
1376       /* *INDENT-OFF* */
1377       ip_neighbor_walk_covered_ctx_t ctx = {
1378         .addr = {
1379           .ip.ip4 = *address,
1380           .version = AF_IP4,
1381         },
1382         .length = address_length,
1383       };
1384       /* *INDENT-ON* */
1385       index_t *ipni;
1386
1387       ip_neighbor_walk (AF_IP4, sw_if_index, ip_neighbor_walk_covered, &ctx);
1388
1389       vec_foreach (ipni, ctx.ipnis)
1390         ip_neighbor_destroy (ip_neighbor_get (*ipni));
1391
1392       vec_free (ctx.ipnis);
1393     }
1394 }
1395
1396 /*
1397  * callback when an interface address is added or deleted
1398  */
1399 static void
1400 ip_neighbor_add_del_interface_address_v6 (ip6_main_t * im,
1401                                           uword opaque,
1402                                           u32 sw_if_index,
1403                                           ip6_address_t * address,
1404                                           u32 address_length,
1405                                           u32 if_address_index, u32 is_del)
1406 {
1407   /*
1408    * Flush the ARP cache of all entries covered by the address
1409    * that is being removed.
1410    */
1411   IP_NEIGHBOR_DBG ("addr-change: %U, %U/%d %s",
1412                    format_vnet_sw_if_index_name, vnet_get_main (),
1413                    sw_if_index, format_ip6_address, address, address_length,
1414                    (is_del ? "del" : "add"));
1415
1416   if (is_del)
1417     {
1418       /* *INDENT-OFF* */
1419       ip_neighbor_walk_covered_ctx_t ctx = {
1420         .addr = {
1421           .ip.ip6 = *address,
1422           .version = AF_IP6,
1423         },
1424         .length = address_length,
1425       };
1426       /* *INDENT-ON* */
1427       index_t *ipni;
1428
1429       ip_neighbor_walk (AF_IP6, sw_if_index, ip_neighbor_walk_covered, &ctx);
1430
1431       vec_foreach (ipni, ctx.ipnis)
1432         ip_neighbor_destroy (ip_neighbor_get (*ipni));
1433
1434       vec_free (ctx.ipnis);
1435     }
1436 }
1437
1438 typedef struct ip_neighbor_table_bind_ctx_t_
1439 {
1440   u32 new_fib_index;
1441   u32 old_fib_index;
1442 } ip_neighbor_table_bind_ctx_t;
1443
1444 static walk_rc_t
1445 ip_neighbor_walk_table_bind (index_t ipni, void *arg)
1446 {
1447   ip_neighbor_table_bind_ctx_t *ctx = arg;
1448   ip_neighbor_t *ipn;
1449
1450   ipn = ip_neighbor_get (ipni);
1451   ip_neighbor_adj_fib_remove (ipn, ctx->old_fib_index);
1452   ip_neighbor_adj_fib_add (ipn, ctx->new_fib_index);
1453
1454   return (WALK_CONTINUE);
1455 }
1456
1457 static void
1458 ip_neighbor_table_bind_v4 (ip4_main_t * im,
1459                            uword opaque,
1460                            u32 sw_if_index,
1461                            u32 new_fib_index, u32 old_fib_index)
1462 {
1463   ip_neighbor_table_bind_ctx_t ctx = {
1464     .old_fib_index = old_fib_index,
1465     .new_fib_index = new_fib_index,
1466   };
1467
1468   ip_neighbor_walk (AF_IP4, sw_if_index, ip_neighbor_walk_table_bind, &ctx);
1469 }
1470
1471 static void
1472 ip_neighbor_table_bind_v6 (ip6_main_t * im,
1473                            uword opaque,
1474                            u32 sw_if_index,
1475                            u32 new_fib_index, u32 old_fib_index)
1476 {
1477   ip_neighbor_table_bind_ctx_t ctx = {
1478     .old_fib_index = old_fib_index,
1479     .new_fib_index = new_fib_index,
1480   };
1481
1482   ip_neighbor_walk (AF_IP6, sw_if_index, ip_neighbor_walk_table_bind, &ctx);
1483 }
1484
1485 typedef enum ip_neighbor_age_state_t_
1486 {
1487   IP_NEIGHBOR_AGE_ALIVE,
1488   IP_NEIGHBOR_AGE_PROBE,
1489   IP_NEIGHBOR_AGE_DEAD,
1490 } ip_neighbor_age_state_t;
1491
1492 #define IP_NEIGHBOR_PROCESS_SLEEP_LONG (0)
1493
1494 static ip_neighbor_age_state_t
1495 ip_neighbour_age_out (index_t ipni, f64 now, f64 * wait)
1496 {
1497   ip_address_family_t af;
1498   ip_neighbor_t *ipn;
1499   u32 ipndb_age;
1500   u32 ttl;
1501
1502   ipn = ip_neighbor_get (ipni);
1503   af = ip_neighbor_get_af (ipn);
1504   ipndb_age = ip_neighbor_db[af].ipndb_age;
1505   ttl = now - ipn->ipn_time_last_updated;
1506   *wait = ipndb_age;
1507
1508   if (ttl > ipndb_age)
1509     {
1510       IP_NEIGHBOR_DBG ("aged: %U @%f - %f > %d",
1511                        format_ip_neighbor, ipni, now,
1512                        ipn->ipn_time_last_updated, ipndb_age);
1513       if (ipn->ipn_n_probes > 2)
1514         {
1515           /* 3 strikes and yea-re out */
1516           IP_NEIGHBOR_DBG ("dead: %U", format_ip_neighbor, ipni);
1517           *wait = 1;
1518           return (IP_NEIGHBOR_AGE_DEAD);
1519         }
1520       else
1521         {
1522           ip_neighbor_probe_dst (ip_neighbor_get_sw_if_index (ipn),
1523                                  af, &ip_addr_46 (&ipn->ipn_key->ipnk_ip));
1524
1525           ipn->ipn_n_probes++;
1526           *wait = 1;
1527         }
1528     }
1529   else
1530     {
1531       /* here we are sure that ttl <= ipndb_age */
1532       *wait = ipndb_age - ttl + 1;
1533       return (IP_NEIGHBOR_AGE_ALIVE);
1534     }
1535
1536   return (IP_NEIGHBOR_AGE_PROBE);
1537 }
1538
1539 typedef enum ip_neighbor_process_event_t_
1540 {
1541   IP_NEIGHBOR_AGE_PROCESS_WAKEUP,
1542 } ip_neighbor_process_event_t;
1543
1544 static uword
1545 ip_neighbor_age_loop (vlib_main_t * vm,
1546                       vlib_node_runtime_t * rt,
1547                       vlib_frame_t * f, ip_address_family_t af)
1548 {
1549   uword event_type, *event_data = NULL;
1550   f64 timeout;
1551
1552   /* Set the timeout to an effectively infinite value when the process starts */
1553   timeout = IP_NEIGHBOR_PROCESS_SLEEP_LONG;
1554
1555   while (1)
1556     {
1557       f64 now;
1558
1559       if (!timeout)
1560         vlib_process_wait_for_event (vm);
1561       else
1562         vlib_process_wait_for_event_or_clock (vm, timeout);
1563
1564       event_type = vlib_process_get_events (vm, &event_data);
1565       vec_reset_length (event_data);
1566
1567       now = vlib_time_now (vm);
1568
1569       switch (event_type)
1570         {
1571         case ~0:
1572           {
1573             /* timer expired */
1574             ip_neighbor_elt_t *elt, *head;
1575             f64 wait;
1576
1577             timeout = ip_neighbor_db[af].ipndb_age;
1578             head = pool_elt_at_index (ip_neighbor_elt_pool,
1579                                       ip_neighbor_list_head[af]);
1580
1581           /* *INDENT-OFF*/
1582           /* the list is time sorted, newest first, so start from the back
1583            * and work forwards. Stop when we get to one that is alive */
1584           restart:
1585           clib_llist_foreach_reverse(ip_neighbor_elt_pool,
1586                                      ipne_anchor, head, elt,
1587           ({
1588             ip_neighbor_age_state_t res;
1589
1590             res = ip_neighbour_age_out(elt->ipne_index, now, &wait);
1591
1592             if (IP_NEIGHBOR_AGE_ALIVE == res) {
1593               /* the oldest neighbor has not yet expired, go back to sleep */
1594               timeout = clib_min (wait, timeout);
1595               break;
1596             }
1597             else if (IP_NEIGHBOR_AGE_DEAD == res) {
1598               /* the oldest neighbor is dead, pop it, then restart the walk
1599                * again from the back */
1600               ip_neighbor_destroy (ip_neighbor_get(elt->ipne_index));
1601               goto restart;
1602             }
1603
1604             timeout = clib_min (wait, timeout);
1605           }));
1606           /* *INDENT-ON* */
1607             break;
1608           }
1609         case IP_NEIGHBOR_AGE_PROCESS_WAKEUP:
1610           {
1611
1612             if (!ip_neighbor_db[af].ipndb_age)
1613               {
1614                 /* aging has been disabled */
1615                 timeout = 0;
1616                 break;
1617               }
1618             ip_neighbor_elt_t *elt, *head;
1619
1620             head = pool_elt_at_index (ip_neighbor_elt_pool,
1621                                       ip_neighbor_list_head[af]);
1622             /* no neighbors yet */
1623             if (clib_llist_is_empty (ip_neighbor_elt_pool, ipne_anchor, head))
1624               {
1625                 timeout = ip_neighbor_db[af].ipndb_age;
1626                 break;
1627               }
1628
1629             /* poke the oldset neighbour for aging, which returns how long we sleep for */
1630             elt = clib_llist_prev (ip_neighbor_elt_pool, ipne_anchor, head);
1631             ip_neighbour_age_out (elt->ipne_index, now, &timeout);
1632             break;
1633           }
1634         }
1635     }
1636   return 0;
1637 }
1638
1639 static uword
1640 ip4_neighbor_age_process (vlib_main_t * vm,
1641                           vlib_node_runtime_t * rt, vlib_frame_t * f)
1642 {
1643   return (ip_neighbor_age_loop (vm, rt, f, AF_IP4));
1644 }
1645
1646 static uword
1647 ip6_neighbor_age_process (vlib_main_t * vm,
1648                           vlib_node_runtime_t * rt, vlib_frame_t * f)
1649 {
1650   return (ip_neighbor_age_loop (vm, rt, f, AF_IP6));
1651 }
1652
1653 /* *INDENT-OFF* */
1654 VLIB_REGISTER_NODE (ip4_neighbor_age_process_node,static) = {
1655   .function = ip4_neighbor_age_process,
1656   .type = VLIB_NODE_TYPE_PROCESS,
1657   .name = "ip4-neighbor-age-process",
1658 };
1659 VLIB_REGISTER_NODE (ip6_neighbor_age_process_node,static) = {
1660   .function = ip6_neighbor_age_process,
1661   .type = VLIB_NODE_TYPE_PROCESS,
1662   .name = "ip6-neighbor-age-process",
1663 };
1664 /* *INDENT-ON* */
1665
1666 int
1667 ip_neighbor_config (ip_address_family_t af, u32 limit, u32 age, bool recycle)
1668 {
1669   ip_neighbor_db[af].ipndb_limit = limit;
1670   ip_neighbor_db[af].ipndb_recycle = recycle;
1671   ip_neighbor_db[af].ipndb_age = age;
1672
1673   vlib_process_signal_event (vlib_get_main (),
1674                              (AF_IP4 == af ?
1675                               ip4_neighbor_age_process_node.index :
1676                               ip6_neighbor_age_process_node.index),
1677                              IP_NEIGHBOR_AGE_PROCESS_WAKEUP, 0);
1678
1679   return (0);
1680 }
1681
1682 static clib_error_t *
1683 ip_neighbor_config_show (vlib_main_t * vm,
1684                          unformat_input_t * input, vlib_cli_command_t * cmd)
1685 {
1686   ip_address_family_t af;
1687
1688   /* *INDENT-OFF* */
1689   FOR_EACH_IP_ADDRESS_FAMILY(af) {
1690     vlib_cli_output (vm, "%U:", format_ip_address_family, af);
1691     vlib_cli_output (vm, "  limit:%d, age:%d, recycle:%d",
1692                      ip_neighbor_db[af].ipndb_limit,
1693                      ip_neighbor_db[af].ipndb_age,
1694                      ip_neighbor_db[af].ipndb_recycle);
1695   }
1696
1697   /* *INDENT-ON* */
1698   return (NULL);
1699 }
1700
1701 static clib_error_t *
1702 ip_neighbor_config_set (vlib_main_t *vm, unformat_input_t *input,
1703                         vlib_cli_command_t *cmd)
1704 {
1705   unformat_input_t _line_input, *line_input = &_line_input;
1706   clib_error_t *error = NULL;
1707   ip_address_family_t af;
1708   u32 limit, age;
1709   bool recycle;
1710
1711   if (!unformat_user (input, unformat_line_input, line_input))
1712     return 0;
1713
1714   if (!unformat (line_input, "%U", unformat_ip_address_family, &af))
1715     {
1716       error = unformat_parse_error (line_input);
1717       goto done;
1718     }
1719
1720   limit = ip_neighbor_db[af].ipndb_limit;
1721   age = ip_neighbor_db[af].ipndb_age;
1722   recycle = ip_neighbor_db[af].ipndb_recycle;
1723
1724   while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
1725     {
1726       if (unformat (line_input, "limit %u", &limit))
1727         ;
1728       else if (unformat (line_input, "age %u", &age))
1729         ;
1730       else if (unformat (line_input, "recycle"))
1731         recycle = true;
1732       else if (unformat (line_input, "norecycle"))
1733         recycle = false;
1734       else
1735         {
1736           error = unformat_parse_error (line_input);
1737           goto done;
1738         }
1739     }
1740
1741   ip_neighbor_config (af, limit, age, recycle);
1742
1743 done:
1744   unformat_free (line_input);
1745   return error;
1746 }
1747
1748 /* *INDENT-OFF* */
1749 VLIB_CLI_COMMAND (show_ip_neighbor_cfg_cmd_node, static) = {
1750   .path = "show ip neighbor-config",
1751   .function = ip_neighbor_config_show,
1752   .short_help = "show ip neighbor-config",
1753 };
1754 VLIB_CLI_COMMAND (set_ip_neighbor_cfg_cmd_node, static) = {
1755   .path = "set ip neighbor-config",
1756   .function = ip_neighbor_config_set,
1757   .short_help = "set ip neighbor-config ip4|ip6 [limit <limit>] [age <age>] "
1758                 "[recycle|norecycle]",
1759 };
1760 /* *INDENT-ON* */
1761
1762 static clib_error_t *
1763 ip_neighbor_init (vlib_main_t * vm)
1764 {
1765   {
1766     ip4_add_del_interface_address_callback_t cb = {
1767       .function = ip_neighbor_add_del_interface_address_v4,
1768     };
1769     vec_add1 (ip4_main.add_del_interface_address_callbacks, cb);
1770   }
1771   {
1772     ip6_add_del_interface_address_callback_t cb = {
1773       .function = ip_neighbor_add_del_interface_address_v6,
1774     };
1775     vec_add1 (ip6_main.add_del_interface_address_callbacks, cb);
1776   }
1777   {
1778     ip4_table_bind_callback_t cb = {
1779       .function = ip_neighbor_table_bind_v4,
1780     };
1781     vec_add1 (ip4_main.table_bind_callbacks, cb);
1782   }
1783   {
1784     ip6_table_bind_callback_t cb = {
1785       .function = ip_neighbor_table_bind_v6,
1786     };
1787     vec_add1 (ip6_main.table_bind_callbacks, cb);
1788   }
1789   ipn_logger = vlib_log_register_class ("ip", "neighbor");
1790
1791   ip_address_family_t af;
1792
1793   FOR_EACH_IP_ADDRESS_FAMILY (af)
1794     ip_neighbor_list_head[af] =
1795     clib_llist_make_head (ip_neighbor_elt_pool, ipne_anchor);
1796
1797   return (NULL);
1798 }
1799
1800 /* *INDENT-OFF* */
1801 VLIB_INIT_FUNCTION (ip_neighbor_init) =
1802 {
1803   .runs_after = VLIB_INITS("ip_main_init"),
1804 };
1805 /* *INDENT-ON* */
1806
1807 /*
1808  * fd.io coding-style-patch-verification: ON
1809  *
1810  * Local Variables:
1811  * eval: (c-set-style "gnu")
1812  * End:
1813  */