ip-neighbor: add set ip neighbor-config CLI command
[vpp.git] / src / vnet / ip-neighbor / ip_neighbor.c
1 /*
2  * src/vnet/ip/ip_neighboor.c: ip neighbor generic handling
3  *
4  * Copyright (c) 2018 Cisco and/or its affiliates.
5  * Licensed under the Apache License, Version 2.0 (the "License");
6  * you may not use this file except in compliance with the License.
7  * You may obtain a copy of the License at:
8  *
9  *     http://www.apache.org/licenses/LICENSE-2.0
10  *
11  * Unless required by applicable law or agreed to in writing, software
12  * distributed under the License is distributed on an "AS IS" BASIS,
13  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14  * See the License for the specific language governing permissions and
15  * limitations under the License.
16  */
17
18 #include <vppinfra/llist.h>
19
20 #include <vnet/ip-neighbor/ip_neighbor.h>
21 #include <vnet/ip-neighbor/ip4_neighbor.h>
22 #include <vnet/ip-neighbor/ip6_neighbor.h>
23 #include <vnet/ip-neighbor/ip_neighbor_watch.h>
24
25 #include <vnet/ip/ip6_ll_table.h>
26 #include <vnet/ip/ip46_address.h>
27 #include <vnet/fib/fib_table.h>
28 #include <vnet/adj/adj_mcast.h>
29
30 /** Pool for All IP neighbors */
31 static ip_neighbor_t *ip_neighbor_pool;
32
33 /** protocol specific lists of time sorted neighbors */
34 index_t ip_neighbor_list_head[N_AF];
35
36 typedef struct ip_neighbor_elt_t_
37 {
38   clib_llist_anchor_t ipne_anchor;
39   index_t ipne_index;
40 } ip_neighbor_elt_t;
41
42 /** Pool of linked list elemeents */
43 ip_neighbor_elt_t *ip_neighbor_elt_pool;
44
45 typedef struct ip_neighbor_db_t_
46 {
47   /** per interface hash */
48   uword **ipndb_hash;
49   /** per-protocol limit - max number of neighbors*/
50   u32 ipndb_limit;
51   /** max age of a neighbor before it's forcibly evicted */
52   u32 ipndb_age;
53   /** when the limit is reached and new neighbors are created, should
54    * we recycle an old one */
55   bool ipndb_recycle;
56   /** per-protocol number of elements */
57   u32 ipndb_n_elts;
58   /** per-protocol number of elements per-fib-index*/
59   u32 *ipndb_n_elts_per_fib;
60 } ip_neighbor_db_t;
61
62 static vlib_log_class_t ipn_logger;
63
64 /* DBs of neighbours one per AF */
65 /* *INDENT-OFF* */
66 static ip_neighbor_db_t ip_neighbor_db[N_AF] = {
67   [AF_IP4] = {
68     .ipndb_limit = 50000,
69     /* Default to not aging and not recycling */
70     .ipndb_age = 0,
71     .ipndb_recycle = false,
72   },
73   [AF_IP6] = {
74     .ipndb_limit = 50000,
75     /* Default to not aging and not recycling */
76     .ipndb_age = 0,
77     .ipndb_recycle = false,
78   }
79 };
80 /* *INDENT-ON* */
81
82 #define IP_NEIGHBOR_DBG(...)                           \
83     vlib_log_debug (ipn_logger, __VA_ARGS__);
84
85 #define IP_NEIGHBOR_INFO(...)                          \
86     vlib_log_notice (ipn_logger, __VA_ARGS__);
87
88 ip_neighbor_t *
89 ip_neighbor_get (index_t ipni)
90 {
91   if (pool_is_free_index (ip_neighbor_pool, ipni))
92     return (NULL);
93
94   return (pool_elt_at_index (ip_neighbor_pool, ipni));
95 }
96
97 static index_t
98 ip_neighbor_get_index (const ip_neighbor_t * ipn)
99 {
100   return (ipn - ip_neighbor_pool);
101 }
102
103 static void
104 ip_neighbor_touch (ip_neighbor_t * ipn)
105 {
106   ipn->ipn_flags &= ~IP_NEIGHBOR_FLAG_STALE;
107 }
108
109 static bool
110 ip_neighbor_is_dynamic (const ip_neighbor_t * ipn)
111 {
112   return (ipn->ipn_flags & IP_NEIGHBOR_FLAG_DYNAMIC);
113 }
114
115 const ip_address_t *
116 ip_neighbor_get_ip (const ip_neighbor_t * ipn)
117 {
118   return (&ipn->ipn_key->ipnk_ip);
119 }
120
121 ip_address_family_t
122 ip_neighbor_get_af (const ip_neighbor_t * ipn)
123 {
124   return (ip_addr_version (&ipn->ipn_key->ipnk_ip));
125 }
126
127 const mac_address_t *
128 ip_neighbor_get_mac (const ip_neighbor_t * ipn)
129 {
130   return (&ipn->ipn_mac);
131 }
132
133 const u32
134 ip_neighbor_get_sw_if_index (const ip_neighbor_t * ipn)
135 {
136   return (ipn->ipn_key->ipnk_sw_if_index);
137 }
138
139 static void
140 ip_neighbor_list_remove (ip_neighbor_t * ipn)
141 {
142   /* new neighbours, are added to the head of the list, since the
143    * list is time sorted, newest first */
144   ip_neighbor_elt_t *elt;
145
146   if (~0 != ipn->ipn_elt)
147     {
148       elt = pool_elt_at_index (ip_neighbor_elt_pool, ipn->ipn_elt);
149
150       clib_llist_remove (ip_neighbor_elt_pool, ipne_anchor, elt);
151
152       ipn->ipn_elt = ~0;
153     }
154 }
155
156 static void
157 ip_neighbor_refresh (ip_neighbor_t * ipn)
158 {
159   /* new neighbours, are added to the head of the list, since the
160    * list is time sorted, newest first */
161   ip_neighbor_elt_t *elt, *head;
162
163   ip_neighbor_touch (ipn);
164   ipn->ipn_time_last_updated = vlib_time_now (vlib_get_main ());
165   ipn->ipn_n_probes = 0;
166
167   if (ip_neighbor_is_dynamic (ipn))
168     {
169       if (~0 == ipn->ipn_elt)
170         /* first time insertion */
171         pool_get_zero (ip_neighbor_elt_pool, elt);
172       else
173         {
174           /* already inserted - extract first */
175           elt = pool_elt_at_index (ip_neighbor_elt_pool, ipn->ipn_elt);
176
177           clib_llist_remove (ip_neighbor_elt_pool, ipne_anchor, elt);
178         }
179       head = pool_elt_at_index (ip_neighbor_elt_pool,
180                                 ip_neighbor_list_head[ip_neighbor_get_af
181                                                       (ipn)]);
182
183       elt->ipne_index = ip_neighbor_get_index (ipn);
184       clib_llist_add (ip_neighbor_elt_pool, ipne_anchor, elt, head);
185       ipn->ipn_elt = elt - ip_neighbor_elt_pool;
186     }
187 }
188
189 static void
190 ip_neighbor_db_add (const ip_neighbor_t * ipn)
191 {
192   ip_address_family_t af;
193   u32 sw_if_index;
194
195   af = ip_neighbor_get_af (ipn);
196   sw_if_index = ipn->ipn_key->ipnk_sw_if_index;
197
198   vec_validate (ip_neighbor_db[af].ipndb_hash, sw_if_index);
199
200   if (!ip_neighbor_db[af].ipndb_hash[sw_if_index])
201     ip_neighbor_db[af].ipndb_hash[sw_if_index]
202       = hash_create_mem (0, sizeof (ip_neighbor_key_t), sizeof (index_t));
203
204   hash_set_mem (ip_neighbor_db[af].ipndb_hash[sw_if_index],
205                 ipn->ipn_key, ip_neighbor_get_index (ipn));
206
207   ip_neighbor_db[af].ipndb_n_elts++;
208 }
209
210 static void
211 ip_neighbor_db_remove (const ip_neighbor_t * ipn)
212 {
213   ip_address_family_t af;
214   u32 sw_if_index;
215
216   af = ip_neighbor_get_af (ipn);
217   sw_if_index = ipn->ipn_key->ipnk_sw_if_index;
218
219   vec_validate (ip_neighbor_db[af].ipndb_hash, sw_if_index);
220
221   hash_unset_mem (ip_neighbor_db[af].ipndb_hash[sw_if_index], ipn->ipn_key);
222
223   ip_neighbor_db[af].ipndb_n_elts--;
224 }
225
226 static ip_neighbor_t *
227 ip_neighbor_db_find (const ip_neighbor_key_t * key)
228 {
229   ip_address_family_t af;
230   uword *p;
231
232   af = ip_addr_version (&key->ipnk_ip);
233
234   if (key->ipnk_sw_if_index >= vec_len (ip_neighbor_db[af].ipndb_hash))
235     return NULL;
236
237   p = hash_get_mem (ip_neighbor_db[af].ipndb_hash
238                     [key->ipnk_sw_if_index], key);
239
240   if (p)
241     return ip_neighbor_get (p[0]);
242
243   return (NULL);
244 }
245
246 static u8
247 ip_af_type_pfx_len (ip_address_family_t type)
248 {
249   return (type == AF_IP4 ? 32 : 128);
250 }
251
252 static void
253 ip_neighbor_adj_fib_add (ip_neighbor_t * ipn, u32 fib_index)
254 {
255   ip_address_family_t af;
256
257   af = ip_neighbor_get_af (ipn);
258
259   if (af == AF_IP6 &&
260       ip6_address_is_link_local_unicast (&ip_addr_v6
261                                          (&ipn->ipn_key->ipnk_ip)))
262     {
263       ip6_ll_prefix_t pfx = {
264         .ilp_addr = ip_addr_v6 (&ipn->ipn_key->ipnk_ip),
265         .ilp_sw_if_index = ipn->ipn_key->ipnk_sw_if_index,
266       };
267       ipn->ipn_fib_entry_index =
268         ip6_ll_table_entry_update (&pfx, FIB_ROUTE_PATH_FLAG_NONE);
269     }
270   else
271     {
272       fib_protocol_t fproto;
273
274       fproto = ip_address_family_to_fib_proto (af);
275
276       fib_prefix_t pfx = {
277         .fp_len = ip_af_type_pfx_len (af),
278         .fp_proto = fproto,
279         .fp_addr = ip_addr_46 (&ipn->ipn_key->ipnk_ip),
280       };
281
282       ipn->ipn_fib_entry_index =
283         fib_table_entry_path_add (fib_index, &pfx, FIB_SOURCE_ADJ,
284                                   FIB_ENTRY_FLAG_ATTACHED,
285                                   fib_proto_to_dpo (fproto),
286                                   &pfx.fp_addr,
287                                   ipn->ipn_key->ipnk_sw_if_index,
288                                   ~0, 1, NULL, FIB_ROUTE_PATH_FLAG_NONE);
289
290       vec_validate (ip_neighbor_db[af].ipndb_n_elts_per_fib, fib_index);
291
292       ip_neighbor_db[af].ipndb_n_elts_per_fib[fib_index]++;
293
294       if (1 == ip_neighbor_db[af].ipndb_n_elts_per_fib[fib_index])
295         fib_table_lock (fib_index, fproto, FIB_SOURCE_ADJ);
296     }
297 }
298
299 static void
300 ip_neighbor_adj_fib_remove (ip_neighbor_t * ipn, u32 fib_index)
301 {
302   ip_address_family_t af;
303
304   af = ip_neighbor_get_af (ipn);
305
306   if (FIB_NODE_INDEX_INVALID != ipn->ipn_fib_entry_index)
307     {
308       if (AF_IP6 == af &&
309           ip6_address_is_link_local_unicast (&ip_addr_v6
310                                              (&ipn->ipn_key->ipnk_ip)))
311         {
312           ip6_ll_prefix_t pfx = {
313             .ilp_addr = ip_addr_v6 (&ipn->ipn_key->ipnk_ip),
314             .ilp_sw_if_index = ipn->ipn_key->ipnk_sw_if_index,
315           };
316           ip6_ll_table_entry_delete (&pfx);
317         }
318       else
319         {
320           fib_protocol_t fproto;
321
322           fproto = ip_address_family_to_fib_proto (af);
323
324           fib_prefix_t pfx = {
325             .fp_len = ip_af_type_pfx_len (af),
326             .fp_proto = fproto,
327             .fp_addr = ip_addr_46 (&ipn->ipn_key->ipnk_ip),
328           };
329
330           fib_table_entry_path_remove (fib_index,
331                                        &pfx,
332                                        FIB_SOURCE_ADJ,
333                                        fib_proto_to_dpo (fproto),
334                                        &pfx.fp_addr,
335                                        ipn->ipn_key->ipnk_sw_if_index,
336                                        ~0, 1, FIB_ROUTE_PATH_FLAG_NONE);
337
338           ip_neighbor_db[af].ipndb_n_elts_per_fib[fib_index]--;
339
340           if (0 == ip_neighbor_db[af].ipndb_n_elts_per_fib[fib_index])
341             fib_table_unlock (fib_index, fproto, FIB_SOURCE_ADJ);
342         }
343     }
344 }
345
346 static void
347 ip_neighbor_mk_complete (adj_index_t ai, ip_neighbor_t * ipn)
348 {
349   adj_nbr_update_rewrite (ai, ADJ_NBR_REWRITE_FLAG_COMPLETE,
350                           ethernet_build_rewrite (vnet_get_main (),
351                                                   ipn->
352                                                   ipn_key->ipnk_sw_if_index,
353                                                   adj_get_link_type (ai),
354                                                   ipn->ipn_mac.bytes));
355 }
356
357 static void
358 ip_neighbor_mk_incomplete (adj_index_t ai)
359 {
360   ip_adjacency_t *adj = adj_get (ai);
361
362   adj_nbr_update_rewrite (ai,
363                           ADJ_NBR_REWRITE_FLAG_INCOMPLETE,
364                           ethernet_build_rewrite (vnet_get_main (),
365                                                   adj->
366                                                   rewrite_header.sw_if_index,
367                                                   VNET_LINK_ARP,
368                                                   VNET_REWRITE_FOR_SW_INTERFACE_ADDRESS_BROADCAST));
369 }
370
371 static adj_walk_rc_t
372 ip_neighbor_mk_complete_walk (adj_index_t ai, void *ctx)
373 {
374   ip_neighbor_t *ipn = ctx;
375
376   ip_neighbor_mk_complete (ai, ipn);
377
378   return (ADJ_WALK_RC_CONTINUE);
379 }
380
381 static adj_walk_rc_t
382 ip_neighbor_mk_incomplete_walk (adj_index_t ai, void *ctx)
383 {
384   ip_neighbor_mk_incomplete (ai);
385
386   return (ADJ_WALK_RC_CONTINUE);
387 }
388
389 static void
390 ip_neighbor_destroy (ip_neighbor_t * ipn)
391 {
392   ip_address_family_t af;
393
394   af = ip_neighbor_get_af (ipn);
395
396   IP_NEIGHBOR_DBG ("free: %U", format_ip_neighbor,
397                    ip_neighbor_get_index (ipn));
398
399   ip_neighbor_publish (ip_neighbor_get_index (ipn),
400                        IP_NEIGHBOR_EVENT_REMOVED);
401
402   adj_nbr_walk_nh (ipn->ipn_key->ipnk_sw_if_index,
403                    ip_address_family_to_fib_proto (af),
404                    &ip_addr_46 (&ipn->ipn_key->ipnk_ip),
405                    ip_neighbor_mk_incomplete_walk, ipn);
406   ip_neighbor_adj_fib_remove
407     (ipn,
408      fib_table_get_index_for_sw_if_index
409      (ip_address_family_to_fib_proto (af), ipn->ipn_key->ipnk_sw_if_index));
410
411   ip_neighbor_list_remove (ipn);
412   ip_neighbor_db_remove (ipn);
413   clib_mem_free (ipn->ipn_key);
414
415   pool_put (ip_neighbor_pool, ipn);
416 }
417
418 static bool
419 ip_neighbor_force_reuse (ip_address_family_t af)
420 {
421   if (!ip_neighbor_db[af].ipndb_recycle)
422     return false;
423
424   /* pluck the oldest entry, which is the one from the end of the list */
425   ip_neighbor_elt_t *elt, *head;
426
427   head = pool_elt_at_index (ip_neighbor_elt_pool, ip_neighbor_list_head[af]);
428
429   if (clib_llist_is_empty (ip_neighbor_elt_pool, ipne_anchor, head))
430     return (false);
431
432   elt = clib_llist_prev (ip_neighbor_elt_pool, ipne_anchor, head);
433   ip_neighbor_destroy (ip_neighbor_get (elt->ipne_index));
434
435   return (true);
436 }
437
438 static ip_neighbor_t *
439 ip_neighbor_alloc (const ip_neighbor_key_t * key,
440                    const mac_address_t * mac, ip_neighbor_flags_t flags)
441 {
442   ip_address_family_t af;
443   ip_neighbor_t *ipn;
444
445   af = ip_addr_version (&key->ipnk_ip);
446
447   if (ip_neighbor_db[af].ipndb_limit &&
448       (ip_neighbor_db[af].ipndb_n_elts >= ip_neighbor_db[af].ipndb_limit))
449     {
450       if (!ip_neighbor_force_reuse (af))
451         return (NULL);
452     }
453
454   pool_get_zero (ip_neighbor_pool, ipn);
455
456   ipn->ipn_key = clib_mem_alloc (sizeof (*ipn->ipn_key));
457   clib_memcpy (ipn->ipn_key, key, sizeof (*ipn->ipn_key));
458
459   ipn->ipn_fib_entry_index = FIB_NODE_INDEX_INVALID;
460   ipn->ipn_flags = flags;
461   ipn->ipn_elt = ~0;
462
463   mac_address_copy (&ipn->ipn_mac, mac);
464
465   ip_neighbor_db_add (ipn);
466
467   /* create the adj-fib. the entry in the FIB table for the peer's interface */
468   if (!(ipn->ipn_flags & IP_NEIGHBOR_FLAG_NO_FIB_ENTRY))
469     ip_neighbor_adj_fib_add
470       (ipn, fib_table_get_index_for_sw_if_index
471        (ip_address_family_to_fib_proto (af), ipn->ipn_key->ipnk_sw_if_index));
472
473   return (ipn);
474 }
475
476 int
477 ip_neighbor_add (const ip_address_t * ip,
478                  const mac_address_t * mac,
479                  u32 sw_if_index,
480                  ip_neighbor_flags_t flags, u32 * stats_index)
481 {
482   fib_protocol_t fproto;
483   ip_neighbor_t *ipn;
484
485   /* main thread only */
486   ASSERT (0 == vlib_get_thread_index ());
487
488   fproto = ip_address_family_to_fib_proto (ip_addr_version (ip));
489
490   const ip_neighbor_key_t key = {
491     .ipnk_ip = *ip,
492     .ipnk_sw_if_index = sw_if_index,
493   };
494
495   ipn = ip_neighbor_db_find (&key);
496
497   if (ipn)
498     {
499       IP_NEIGHBOR_DBG ("update: %U, %U",
500                        format_vnet_sw_if_index_name, vnet_get_main (),
501                        sw_if_index, format_ip_address, ip,
502                        format_ip_neighbor_flags, flags, format_mac_address_t,
503                        mac);
504
505       ip_neighbor_touch (ipn);
506
507       /* Refuse to over-write static neighbor entry. */
508       if (!(flags & IP_NEIGHBOR_FLAG_STATIC) &&
509           (ipn->ipn_flags & IP_NEIGHBOR_FLAG_STATIC))
510         {
511           /* if MAC address match, still check to send event */
512           if (0 == mac_address_cmp (&ipn->ipn_mac, mac))
513             goto check_customers;
514           return -2;
515         }
516
517       /* A dynamic entry can become static, but not vice-versa.
518        * i.e. since if it was programmed by the CP then it must
519        * be removed by the CP */
520       if ((flags & IP_NEIGHBOR_FLAG_STATIC) &&
521           !(ipn->ipn_flags & IP_NEIGHBOR_FLAG_STATIC))
522         {
523           ip_neighbor_list_remove (ipn);
524           ipn->ipn_flags |= IP_NEIGHBOR_FLAG_STATIC;
525           ipn->ipn_flags &= ~IP_NEIGHBOR_FLAG_DYNAMIC;
526         }
527
528       /*
529        * prevent a DoS attack from the data-plane that
530        * spams us with no-op updates to the MAC address
531        */
532       if (0 == mac_address_cmp (&ipn->ipn_mac, mac))
533         {
534           ip_neighbor_refresh (ipn);
535           goto check_customers;
536         }
537
538       mac_address_copy (&ipn->ipn_mac, mac);
539     }
540   else
541     {
542       IP_NEIGHBOR_INFO ("add: %U, %U",
543                         format_vnet_sw_if_index_name, vnet_get_main (),
544                         sw_if_index, format_ip_address, ip,
545                         format_ip_neighbor_flags, flags, format_mac_address_t,
546                         mac);
547
548       ipn = ip_neighbor_alloc (&key, mac, flags);
549
550       if (NULL == ipn)
551         return VNET_API_ERROR_LIMIT_EXCEEDED;
552     }
553
554   /* Update time stamp and flags. */
555   ip_neighbor_refresh (ipn);
556
557   adj_nbr_walk_nh (ipn->ipn_key->ipnk_sw_if_index,
558                    fproto, &ip_addr_46 (&ipn->ipn_key->ipnk_ip),
559                    ip_neighbor_mk_complete_walk, ipn);
560
561 check_customers:
562   /* Customer(s) requesting event for this address? */
563   ip_neighbor_publish (ip_neighbor_get_index (ipn), IP_NEIGHBOR_EVENT_ADDED);
564
565   if (stats_index)
566     *stats_index = adj_nbr_find (fproto,
567                                  fib_proto_to_link (fproto),
568                                  &ip_addr_46 (&ipn->ipn_key->ipnk_ip),
569                                  ipn->ipn_key->ipnk_sw_if_index);
570   return 0;
571 }
572
573 int
574 ip_neighbor_del (const ip_address_t * ip, u32 sw_if_index)
575 {
576   ip_neighbor_t *ipn;
577
578   /* main thread only */
579   ASSERT (0 == vlib_get_thread_index ());
580
581   IP_NEIGHBOR_INFO ("delete: %U, %U",
582                     format_vnet_sw_if_index_name, vnet_get_main (),
583                     sw_if_index, format_ip_address, ip);
584
585   const ip_neighbor_key_t key = {
586     .ipnk_ip = *ip,
587     .ipnk_sw_if_index = sw_if_index,
588   };
589
590   ipn = ip_neighbor_db_find (&key);
591
592   if (NULL == ipn)
593     return (VNET_API_ERROR_NO_SUCH_ENTRY);
594
595   ip_neighbor_destroy (ipn);
596
597   return (0);
598 }
599
600 typedef struct ip_neighbor_del_all_ctx_t_
601 {
602   index_t *ipn_del;
603 } ip_neighbor_del_all_ctx_t;
604
605 static walk_rc_t
606 ip_neighbor_del_all_walk_cb (index_t ipni, void *arg)
607 {
608   ip_neighbor_del_all_ctx_t *ctx = arg;
609
610   vec_add1 (ctx->ipn_del, ipni);
611
612   return (WALK_CONTINUE);
613 }
614
615 void
616 ip_neighbor_del_all (ip_address_family_t af, u32 sw_if_index)
617 {
618   IP_NEIGHBOR_INFO ("delete-all: %U, %U",
619                     format_ip_address_family, af,
620                     format_vnet_sw_if_index_name, vnet_get_main (),
621                     sw_if_index);
622
623   ip_neighbor_del_all_ctx_t ctx = {
624     .ipn_del = NULL,
625   };
626   index_t *ipni;
627
628   ip_neighbor_walk (af, sw_if_index, ip_neighbor_del_all_walk_cb, &ctx);
629
630   vec_foreach (ipni,
631                ctx.ipn_del) ip_neighbor_destroy (ip_neighbor_get (*ipni));
632   vec_free (ctx.ipn_del);
633 }
634
635 void
636 ip_neighbor_update (vnet_main_t * vnm, adj_index_t ai)
637 {
638   ip_neighbor_t *ipn;
639   ip_adjacency_t *adj;
640
641   adj = adj_get (ai);
642
643   ip_neighbor_key_t key = {
644     .ipnk_sw_if_index = adj->rewrite_header.sw_if_index,
645   };
646
647   ip_address_from_46 (&adj->sub_type.nbr.next_hop,
648                       adj->ia_nh_proto, &key.ipnk_ip);
649
650   ipn = ip_neighbor_db_find (&key);
651
652   switch (adj->lookup_next_index)
653     {
654     case IP_LOOKUP_NEXT_ARP:
655       if (NULL != ipn)
656         {
657           adj_nbr_walk_nh (adj->rewrite_header.sw_if_index,
658                            adj->ia_nh_proto,
659                            &adj->sub_type.nbr.next_hop,
660                            ip_neighbor_mk_complete_walk, ipn);
661         }
662       else
663         {
664           /*
665            * no matching ARP entry.
666            * construct the rewrite required to for an ARP packet, and stick
667            * that in the adj's pipe to smoke.
668            */
669           adj_nbr_update_rewrite
670             (ai,
671              ADJ_NBR_REWRITE_FLAG_INCOMPLETE,
672              ethernet_build_rewrite
673              (vnm,
674               adj->rewrite_header.sw_if_index,
675               VNET_LINK_ARP,
676               VNET_REWRITE_FOR_SW_INTERFACE_ADDRESS_BROADCAST));
677
678           /*
679            * since the FIB has added this adj for a route, it makes sense it
680            * may want to forward traffic sometime soon. Let's send a
681            * speculative ARP. just one. If we were to do periodically that
682            * wouldn't be bad either, but that's more code than i'm prepared to
683            * write at this time for relatively little reward.
684            */
685           /*
686            * adj_nbr_update_rewrite may actually call fib_walk_sync.
687            * fib_walk_sync may allocate a new adjacency and potentially cause
688            * a realloc for adj_pool. When that happens, adj pointer is no
689            * longer valid here.x We refresh adj pointer accordingly.
690            */
691           adj = adj_get (ai);
692           ip_neighbor_probe (adj);
693         }
694       break;
695     case IP_LOOKUP_NEXT_GLEAN:
696     case IP_LOOKUP_NEXT_BCAST:
697     case IP_LOOKUP_NEXT_MCAST:
698     case IP_LOOKUP_NEXT_DROP:
699     case IP_LOOKUP_NEXT_PUNT:
700     case IP_LOOKUP_NEXT_LOCAL:
701     case IP_LOOKUP_NEXT_REWRITE:
702     case IP_LOOKUP_NEXT_MCAST_MIDCHAIN:
703     case IP_LOOKUP_NEXT_MIDCHAIN:
704     case IP_LOOKUP_NEXT_ICMP_ERROR:
705     case IP_LOOKUP_N_NEXT:
706       ASSERT (0);
707       break;
708     }
709 }
710
711 void
712 ip_neighbor_learn (const ip_neighbor_learn_t * l)
713 {
714   ip_neighbor_add (&l->ip, &l->mac, l->sw_if_index,
715                    IP_NEIGHBOR_FLAG_DYNAMIC, NULL);
716 }
717
718 static clib_error_t *
719 ip_neighbor_cmd (vlib_main_t * vm,
720                  unformat_input_t * input, vlib_cli_command_t * cmd)
721 {
722   ip_address_t ip = IP_ADDRESS_V6_ALL_0S;
723   mac_address_t mac = ZERO_MAC_ADDRESS;
724   vnet_main_t *vnm = vnet_get_main ();
725   ip_neighbor_flags_t flags;
726   u32 sw_if_index = ~0;
727   int is_add = 1;
728   int count = 1;
729
730   flags = IP_NEIGHBOR_FLAG_DYNAMIC;
731
732   while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
733     {
734       /* set ip arp TenGigE1/1/0/1 1.2.3.4 aa:bb:... or aabb.ccdd... */
735       if (unformat (input, "%U %U %U",
736                     unformat_vnet_sw_interface, vnm, &sw_if_index,
737                     unformat_ip_address, &ip, unformat_mac_address_t, &mac))
738         ;
739       else if (unformat (input, "delete") || unformat (input, "del"))
740         is_add = 0;
741       else if (unformat (input, "static"))
742         {
743           flags |= IP_NEIGHBOR_FLAG_STATIC;
744           flags &= ~IP_NEIGHBOR_FLAG_DYNAMIC;
745         }
746       else if (unformat (input, "no-fib-entry"))
747         flags |= IP_NEIGHBOR_FLAG_NO_FIB_ENTRY;
748       else if (unformat (input, "count %d", &count))
749         ;
750       else
751         break;
752     }
753
754   if (sw_if_index == ~0 ||
755       ip_address_is_zero (&ip) || mac_address_is_zero (&mac))
756     return clib_error_return (0,
757                               "specify interface, IP address and MAC: `%U'",
758                               format_unformat_error, input);
759
760   while (count)
761     {
762       if (is_add)
763         ip_neighbor_add (&ip, &mac, sw_if_index, flags, NULL);
764       else
765         ip_neighbor_del (&ip, sw_if_index);
766
767       ip_address_increment (&ip);
768       mac_address_increment (&mac);
769
770       --count;
771     }
772
773   return NULL;
774 }
775
776 /* *INDENT-OFF* */
777 /*?
778  * Add or delete IPv4 ARP cache entries.
779  *
780  * @note 'set ip neighbor' options (e.g. delete, static, 'fib-id <id>',
781  * 'count <number>', 'interface ip4_addr mac_addr') can be added in
782  * any order and combination.
783  *
784  * @cliexpar
785  * @parblock
786  * Add or delete IPv4 ARP cache entries as follows. MAC Address can be in
787  * either aa:bb:cc:dd:ee:ff format or aabb.ccdd.eeff format.
788  * @cliexcmd{set ip neighbor GigabitEthernet2/0/0 6.0.0.3 dead.beef.babe}
789  * @cliexcmd{set ip neighbor delete GigabitEthernet2/0/0 6.0.0.3 de:ad:be:ef:ba:be}
790  *
791  * To add or delete an IPv4 ARP cache entry to or from a specific fib
792  * table:
793  * @cliexcmd{set ip neighbor fib-id 1 GigabitEthernet2/0/0 6.0.0.3 dead.beef.babe}
794  * @cliexcmd{set ip neighbor fib-id 1 delete GigabitEthernet2/0/0 6.0.0.3 dead.beef.babe}
795  *
796  * Add or delete IPv4 static ARP cache entries as follows:
797  * @cliexcmd{set ip neighbor static GigabitEthernet2/0/0 6.0.0.3 dead.beef.babe}
798  * @cliexcmd{set ip neighbor static delete GigabitEthernet2/0/0 6.0.0.3 dead.beef.babe}
799  *
800  * For testing / debugging purposes, the 'set ip neighbor' command can add or
801  * delete multiple entries. Supply the 'count N' parameter:
802  * @cliexcmd{set ip neighbor count 10 GigabitEthernet2/0/0 6.0.0.3 dead.beef.babe}
803  * @endparblock
804  ?*/
805 VLIB_CLI_COMMAND (ip_neighbor_command, static) = {
806   .path = "set ip neighbor",
807   .short_help =
808   "set ip neighbor [del] <intfc> <ip-address> <mac-address> [static] [no-fib-entry] [count <count>] [fib-id <fib-id>] [proxy <lo-addr> - <hi-addr>]",
809   .function = ip_neighbor_cmd,
810 };
811 VLIB_CLI_COMMAND (ip_neighbor_command2, static) = {
812   .path = "ip neighbor",
813   .short_help =
814   "ip neighbor [del] <intfc> <ip-address> <mac-address> [static] [no-fib-entry] [count <count>] [fib-id <fib-id>] [proxy <lo-addr> - <hi-addr>]",
815   .function = ip_neighbor_cmd,
816 };
817 /* *INDENT-ON* */
818
819 static int
820 ip_neighbor_sort (void *a1, void *a2)
821 {
822   index_t *ipni1 = a1, *ipni2 = a2;
823   ip_neighbor_t *ipn1, *ipn2;
824   int cmp;
825
826   ipn1 = ip_neighbor_get (*ipni1);
827   ipn2 = ip_neighbor_get (*ipni2);
828
829   cmp = vnet_sw_interface_compare (vnet_get_main (),
830                                    ipn1->ipn_key->ipnk_sw_if_index,
831                                    ipn2->ipn_key->ipnk_sw_if_index);
832   if (!cmp)
833     cmp = ip_address_cmp (&ipn1->ipn_key->ipnk_ip, &ipn2->ipn_key->ipnk_ip);
834   return cmp;
835 }
836
837 static index_t *
838 ip_neighbor_entries (u32 sw_if_index, ip_address_family_t af)
839 {
840   index_t *ipnis = NULL;
841   ip_neighbor_t *ipn;
842
843   /* *INDENT-OFF* */
844   pool_foreach (ipn, ip_neighbor_pool)
845    {
846     if ((sw_if_index == ~0 ||
847         ipn->ipn_key->ipnk_sw_if_index == sw_if_index) &&
848         (N_AF == af ||
849          ip_neighbor_get_af(ipn) == af))
850        vec_add1 (ipnis, ip_neighbor_get_index(ipn));
851   }
852
853   /* *INDENT-ON* */
854
855   if (ipnis)
856     vec_sort_with_function (ipnis, ip_neighbor_sort);
857   return ipnis;
858 }
859
860 static clib_error_t *
861 ip_neighbor_show_sorted_i (vlib_main_t * vm,
862                            unformat_input_t * input,
863                            vlib_cli_command_t * cmd, ip_address_family_t af)
864 {
865   ip_neighbor_elt_t *elt, *head;
866
867   head = pool_elt_at_index (ip_neighbor_elt_pool, ip_neighbor_list_head[af]);
868
869
870   vlib_cli_output (vm, "%=12s%=40s%=6s%=20s%=24s", "Time", "IP",
871                    "Flags", "Ethernet", "Interface");
872
873   /* *INDENT-OFF*/
874   /* the list is time sorted, newest first, so start from the back
875    * and work forwards. Stop when we get to one that is alive */
876   clib_llist_foreach_reverse(ip_neighbor_elt_pool,
877                              ipne_anchor, head, elt,
878   ({
879     vlib_cli_output (vm, "%U", format_ip_neighbor, elt->ipne_index);
880   }));
881   /* *INDENT-ON*/
882
883   return (NULL);
884 }
885
886 static clib_error_t *
887 ip_neighbor_show_i (vlib_main_t * vm,
888                     unformat_input_t * input,
889                     vlib_cli_command_t * cmd, ip_address_family_t af)
890 {
891   index_t *ipni, *ipnis = NULL;
892   u32 sw_if_index;
893
894   /* Filter entries by interface if given. */
895   sw_if_index = ~0;
896   (void) unformat_user (input, unformat_vnet_sw_interface, vnet_get_main (),
897                         &sw_if_index);
898
899   ipnis = ip_neighbor_entries (sw_if_index, af);
900
901   if (ipnis)
902     vlib_cli_output (vm, "%=12s%=40s%=6s%=20s%=24s", "Time", "IP",
903                      "Flags", "Ethernet", "Interface");
904
905   vec_foreach (ipni, ipnis)
906   {
907     vlib_cli_output (vm, "%U", format_ip_neighbor, *ipni);
908   }
909   vec_free (ipnis);
910
911   return (NULL);
912 }
913
914 static clib_error_t *
915 ip_neighbor_show (vlib_main_t * vm,
916                   unformat_input_t * input, vlib_cli_command_t * cmd)
917 {
918   return (ip_neighbor_show_i (vm, input, cmd, N_AF));
919 }
920
921 static clib_error_t *
922 ip6_neighbor_show (vlib_main_t * vm,
923                    unformat_input_t * input, vlib_cli_command_t * cmd)
924 {
925   return (ip_neighbor_show_i (vm, input, cmd, AF_IP6));
926 }
927
928 static clib_error_t *
929 ip4_neighbor_show (vlib_main_t * vm,
930                    unformat_input_t * input, vlib_cli_command_t * cmd)
931 {
932   return (ip_neighbor_show_i (vm, input, cmd, AF_IP4));
933 }
934
935 static clib_error_t *
936 ip6_neighbor_show_sorted (vlib_main_t * vm,
937                           unformat_input_t * input, vlib_cli_command_t * cmd)
938 {
939   return (ip_neighbor_show_sorted_i (vm, input, cmd, AF_IP6));
940 }
941
942 static clib_error_t *
943 ip4_neighbor_show_sorted (vlib_main_t * vm,
944                           unformat_input_t * input, vlib_cli_command_t * cmd)
945 {
946   return (ip_neighbor_show_sorted_i (vm, input, cmd, AF_IP4));
947 }
948
949 /*?
950  * Display all the IP neighbor entries.
951  *
952  * @cliexpar
953  * Example of how to display the IPv4 ARP table:
954  * @cliexstart{show ip neighbor}
955  *    Time      FIB        IP4       Flags      Ethernet              Interface
956  *    346.3028   0       6.1.1.3            de:ad:be:ef:ba:be   GigabitEthernet2/0/0
957  *   3077.4271   0       6.1.1.4       S    de:ad:be:ef:ff:ff   GigabitEthernet2/0/0
958  *   2998.6409   1       6.2.2.3            de:ad:be:ef:00:01   GigabitEthernet2/0/0
959  * Proxy arps enabled for:
960  * Fib_index 0   6.0.0.1 - 6.0.0.11
961  * @cliexend
962  ?*/
963 /* *INDENT-OFF* */
964 VLIB_CLI_COMMAND (show_ip_neighbors_cmd_node, static) = {
965   .path = "show ip neighbors",
966   .function = ip_neighbor_show,
967   .short_help = "show ip neighbors [interface]",
968 };
969 VLIB_CLI_COMMAND (show_ip4_neighbors_cmd_node, static) = {
970   .path = "show ip4 neighbors",
971   .function = ip4_neighbor_show,
972   .short_help = "show ip4 neighbors [interface]",
973 };
974 VLIB_CLI_COMMAND (show_ip6_neighbors_cmd_node, static) = {
975   .path = "show ip6 neighbors",
976   .function = ip6_neighbor_show,
977   .short_help = "show ip6 neighbors [interface]",
978 };
979 VLIB_CLI_COMMAND (show_ip_neighbor_cmd_node, static) = {
980   .path = "show ip neighbor",
981   .function = ip_neighbor_show,
982   .short_help = "show ip neighbor [interface]",
983 };
984 VLIB_CLI_COMMAND (show_ip4_neighbor_cmd_node, static) = {
985   .path = "show ip4 neighbor",
986   .function = ip4_neighbor_show,
987   .short_help = "show ip4 neighbor [interface]",
988 };
989 VLIB_CLI_COMMAND (show_ip6_neighbor_cmd_node, static) = {
990   .path = "show ip6 neighbor",
991   .function = ip6_neighbor_show,
992   .short_help = "show ip6 neighbor [interface]",
993 };
994 VLIB_CLI_COMMAND (show_ip4_neighbor_sorted_cmd_node, static) = {
995   .path = "show ip4 neighbor-sorted",
996   .function = ip4_neighbor_show_sorted,
997   .short_help = "show ip4 neighbor-sorted",
998 };
999 VLIB_CLI_COMMAND (show_ip6_neighbor_sorted_cmd_node, static) = {
1000   .path = "show ip6 neighbor-sorted",
1001   .function = ip6_neighbor_show_sorted,
1002   .short_help = "show ip6 neighbor-sorted",
1003 };
1004 /* *INDENT-ON* */
1005
1006 static ip_neighbor_vft_t ip_nbr_vfts[N_AF];
1007
1008 void
1009 ip_neighbor_register (ip_address_family_t af, const ip_neighbor_vft_t * vft)
1010 {
1011   ip_nbr_vfts[af] = *vft;
1012 }
1013
1014 void
1015 ip_neighbor_probe_dst (u32 sw_if_index,
1016                        ip_address_family_t af, const ip46_address_t * dst)
1017 {
1018   if (!vnet_sw_interface_is_admin_up (vnet_get_main (), sw_if_index))
1019     return;
1020
1021   switch (af)
1022     {
1023     case AF_IP6:
1024       ip6_neighbor_probe_dst (sw_if_index, &dst->ip6);
1025       break;
1026     case AF_IP4:
1027       ip4_neighbor_probe_dst (sw_if_index, &dst->ip4);
1028       break;
1029     }
1030 }
1031
1032 void
1033 ip_neighbor_probe (const ip_adjacency_t * adj)
1034 {
1035   ip_neighbor_probe_dst (adj->rewrite_header.sw_if_index,
1036                          ip_address_family_from_fib_proto (adj->ia_nh_proto),
1037                          &adj->sub_type.nbr.next_hop);
1038 }
1039
1040 void
1041 ip_neighbor_walk (ip_address_family_t af,
1042                   u32 sw_if_index, ip_neighbor_walk_cb_t cb, void *ctx)
1043 {
1044   ip_neighbor_key_t *key;
1045   index_t ipni;
1046
1047   if (~0 == sw_if_index)
1048     {
1049       uword **hash;
1050
1051       vec_foreach (hash, ip_neighbor_db[af].ipndb_hash)
1052       {
1053           /* *INDENT-OFF* */
1054           hash_foreach (key, ipni, *hash,
1055           ({
1056             if (WALK_STOP == cb (ipni, ctx))
1057               break;
1058           }));
1059           /* *INDENT-ON* */
1060       }
1061     }
1062   else
1063     {
1064       uword *hash;
1065
1066       if (vec_len (ip_neighbor_db[af].ipndb_hash) <= sw_if_index)
1067         return;
1068       hash = ip_neighbor_db[af].ipndb_hash[sw_if_index];
1069
1070       /* *INDENT-OFF* */
1071       hash_foreach (key, ipni, hash,
1072       ({
1073         if (WALK_STOP == cb (ipni, ctx))
1074           break;
1075       }));
1076       /* *INDENT-ON* */
1077     }
1078 }
1079
1080 int
1081 ip4_neighbor_proxy_add (u32 fib_index,
1082                         const ip4_address_t * start,
1083                         const ip4_address_t * end)
1084 {
1085   if (ip_nbr_vfts[AF_IP4].inv_proxy4_add)
1086     {
1087       return (ip_nbr_vfts[AF_IP4].inv_proxy4_add (fib_index, start, end));
1088     }
1089
1090   return (-1);
1091 }
1092
1093 int
1094 ip4_neighbor_proxy_delete (u32 fib_index,
1095                            const ip4_address_t * start,
1096                            const ip4_address_t * end)
1097 {
1098   if (ip_nbr_vfts[AF_IP4].inv_proxy4_del)
1099     {
1100       return (ip_nbr_vfts[AF_IP4].inv_proxy4_del (fib_index, start, end));
1101     }
1102   return -1;
1103 }
1104
1105 int
1106 ip4_neighbor_proxy_enable (u32 sw_if_index)
1107 {
1108   if (ip_nbr_vfts[AF_IP4].inv_proxy4_enable)
1109     {
1110       return (ip_nbr_vfts[AF_IP4].inv_proxy4_enable (sw_if_index));
1111     }
1112   return -1;
1113 }
1114
1115 int
1116 ip4_neighbor_proxy_disable (u32 sw_if_index)
1117 {
1118   if (ip_nbr_vfts[AF_IP4].inv_proxy4_disable)
1119     {
1120       return (ip_nbr_vfts[AF_IP4].inv_proxy4_disable (sw_if_index));
1121     }
1122   return -1;
1123 }
1124
1125 int
1126 ip6_neighbor_proxy_add (u32 sw_if_index, const ip6_address_t * addr)
1127 {
1128   if (ip_nbr_vfts[AF_IP6].inv_proxy6_add)
1129     {
1130       return (ip_nbr_vfts[AF_IP6].inv_proxy6_add (sw_if_index, addr));
1131     }
1132   return -1;
1133 }
1134
1135 int
1136 ip6_neighbor_proxy_del (u32 sw_if_index, const ip6_address_t * addr)
1137 {
1138   if (ip_nbr_vfts[AF_IP6].inv_proxy6_del)
1139     {
1140       return (ip_nbr_vfts[AF_IP6].inv_proxy6_del (sw_if_index, addr));
1141     }
1142   return -1;
1143 }
1144
1145 static void
1146 ip_neighbor_ethernet_change_mac (ethernet_main_t * em,
1147                                  u32 sw_if_index, uword opaque)
1148 {
1149   ip_neighbor_t *ipn;
1150
1151   IP_NEIGHBOR_DBG ("mac-change: %U",
1152                    format_vnet_sw_if_index_name, vnet_get_main (),
1153                    sw_if_index);
1154
1155   /* *INDENT-OFF* */
1156   pool_foreach (ipn, ip_neighbor_pool)
1157    {
1158     if (ipn->ipn_key->ipnk_sw_if_index == sw_if_index)
1159       adj_nbr_walk_nh (ipn->ipn_key->ipnk_sw_if_index,
1160                        ip_address_family_to_fib_proto(ip_neighbor_get_af(ipn)),
1161                        &ip_addr_46(&ipn->ipn_key->ipnk_ip),
1162                        ip_neighbor_mk_complete_walk,
1163                        ipn);
1164   }
1165   /* *INDENT-ON* */
1166
1167   adj_glean_update_rewrite_itf (sw_if_index);
1168 }
1169
1170 void
1171 ip_neighbor_populate (ip_address_family_t af, u32 sw_if_index)
1172 {
1173   index_t *ipnis = NULL, *ipni;
1174   ip_neighbor_t *ipn;
1175
1176   IP_NEIGHBOR_DBG ("populate: %U %U",
1177                    format_vnet_sw_if_index_name, vnet_get_main (),
1178                    sw_if_index, format_ip_address_family, af);
1179
1180   /* *INDENT-OFF* */
1181   pool_foreach (ipn, ip_neighbor_pool)
1182    {
1183     if (ip_neighbor_get_af(ipn) == af &&
1184         ipn->ipn_key->ipnk_sw_if_index == sw_if_index)
1185       vec_add1 (ipnis, ipn - ip_neighbor_pool);
1186   }
1187   /* *INDENT-ON* */
1188
1189   vec_foreach (ipni, ipnis)
1190   {
1191     ipn = ip_neighbor_get (*ipni);
1192
1193     adj_nbr_walk_nh (ipn->ipn_key->ipnk_sw_if_index,
1194                      ip_address_family_to_fib_proto (ip_neighbor_get_af
1195                                                      (ipn)),
1196                      &ip_addr_46 (&ipn->ipn_key->ipnk_ip),
1197                      ip_neighbor_mk_complete_walk, ipn);
1198   }
1199   vec_free (ipnis);
1200 }
1201
1202 void
1203 ip_neighbor_flush (ip_address_family_t af, u32 sw_if_index)
1204 {
1205   index_t *ipnis = NULL, *ipni;
1206   ip_neighbor_t *ipn;
1207
1208
1209   IP_NEIGHBOR_DBG ("flush: %U %U",
1210                    format_vnet_sw_if_index_name, vnet_get_main (),
1211                    sw_if_index, format_ip_address_family, af);
1212
1213   /* *INDENT-OFF* */
1214   pool_foreach (ipn, ip_neighbor_pool)
1215    {
1216     if (ip_neighbor_get_af(ipn) == af &&
1217         ipn->ipn_key->ipnk_sw_if_index == sw_if_index &&
1218         ip_neighbor_is_dynamic (ipn))
1219       vec_add1 (ipnis, ipn - ip_neighbor_pool);
1220   }
1221   /* *INDENT-ON* */
1222
1223   vec_foreach (ipni, ipnis) ip_neighbor_destroy (ip_neighbor_get (*ipni));
1224   vec_free (ipnis);
1225 }
1226
1227 static walk_rc_t
1228 ip_neighbor_mark_one (index_t ipni, void *ctx)
1229 {
1230   ip_neighbor_t *ipn;
1231
1232   ipn = ip_neighbor_get (ipni);
1233
1234   ipn->ipn_flags |= IP_NEIGHBOR_FLAG_STALE;
1235
1236   return (WALK_CONTINUE);
1237 }
1238
1239 void
1240 ip_neighbor_mark (ip_address_family_t af)
1241 {
1242   ip_neighbor_walk (af, ~0, ip_neighbor_mark_one, NULL);
1243 }
1244
1245 typedef struct ip_neighbor_sweep_ctx_t_
1246 {
1247   index_t *ipnsc_stale;
1248 } ip_neighbor_sweep_ctx_t;
1249
1250 static walk_rc_t
1251 ip_neighbor_sweep_one (index_t ipni, void *arg)
1252 {
1253   ip_neighbor_sweep_ctx_t *ctx = arg;
1254   ip_neighbor_t *ipn;
1255
1256   ipn = ip_neighbor_get (ipni);
1257
1258   if (ipn->ipn_flags & IP_NEIGHBOR_FLAG_STALE)
1259     {
1260       vec_add1 (ctx->ipnsc_stale, ipni);
1261     }
1262
1263   return (WALK_CONTINUE);
1264 }
1265
1266 void
1267 ip_neighbor_sweep (ip_address_family_t af)
1268 {
1269   ip_neighbor_sweep_ctx_t ctx = { };
1270   index_t *ipni;
1271
1272   ip_neighbor_walk (af, ~0, ip_neighbor_sweep_one, &ctx);
1273
1274   vec_foreach (ipni, ctx.ipnsc_stale)
1275   {
1276     ip_neighbor_destroy (ip_neighbor_get (*ipni));
1277   }
1278   vec_free (ctx.ipnsc_stale);
1279 }
1280
1281 /*
1282  * Remove any arp entries associated with the specified interface
1283  */
1284 static clib_error_t *
1285 ip_neighbor_interface_admin_change (vnet_main_t * vnm,
1286                                     u32 sw_if_index, u32 flags)
1287 {
1288   ip_address_family_t af;
1289
1290   IP_NEIGHBOR_DBG ("interface-admin: %U  %s",
1291                    format_vnet_sw_if_index_name, vnet_get_main (),
1292                    sw_if_index,
1293                    (flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP ? "up" : "down"));
1294
1295   if (flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP)
1296     {
1297       FOR_EACH_IP_ADDRESS_FAMILY (af) ip_neighbor_populate (af, sw_if_index);
1298     }
1299   else
1300     {
1301       /* admin down, flush all neighbours */
1302       FOR_EACH_IP_ADDRESS_FAMILY (af) ip_neighbor_flush (af, sw_if_index);
1303     }
1304
1305   return (NULL);
1306 }
1307
1308 VNET_SW_INTERFACE_ADMIN_UP_DOWN_FUNCTION (ip_neighbor_interface_admin_change);
1309
1310 /*
1311  * Remove any arp entries associated with the specified interface
1312  */
1313 static clib_error_t *
1314 ip_neighbor_delete_sw_interface (vnet_main_t * vnm,
1315                                  u32 sw_if_index, u32 is_add)
1316 {
1317   IP_NEIGHBOR_DBG ("interface-change: %U  %s",
1318                    format_vnet_sw_if_index_name, vnet_get_main (),
1319                    sw_if_index, (is_add ? "add" : "del"));
1320
1321   if (!is_add && sw_if_index != ~0)
1322     {
1323       ip_address_family_t af;
1324
1325       FOR_EACH_IP_ADDRESS_FAMILY (af) ip_neighbor_flush (af, sw_if_index);
1326     }
1327
1328   return (NULL);
1329 }
1330
1331 VNET_SW_INTERFACE_ADD_DEL_FUNCTION (ip_neighbor_delete_sw_interface);
1332
1333 typedef struct ip_neighbor_walk_covered_ctx_t_
1334 {
1335   ip_address_t addr;
1336   u32 length;
1337   index_t *ipnis;
1338 } ip_neighbor_walk_covered_ctx_t;
1339
1340 static walk_rc_t
1341 ip_neighbor_walk_covered (index_t ipni, void *arg)
1342 {
1343   ip_neighbor_walk_covered_ctx_t *ctx = arg;
1344   ip_neighbor_t *ipn;
1345
1346   ipn = ip_neighbor_get (ipni);
1347
1348   if (AF_IP4 == ip_addr_version (&ctx->addr))
1349     {
1350       if (ip4_destination_matches_route (&ip4_main,
1351                                          &ip_addr_v4 (&ipn->ipn_key->ipnk_ip),
1352                                          &ip_addr_v4 (&ctx->addr),
1353                                          ctx->length) &&
1354           ip_neighbor_is_dynamic (ipn))
1355         {
1356           vec_add1 (ctx->ipnis, ip_neighbor_get_index (ipn));
1357         }
1358     }
1359   else if (AF_IP6 == ip_addr_version (&ctx->addr))
1360     {
1361       if (ip6_destination_matches_route (&ip6_main,
1362                                          &ip_addr_v6 (&ipn->ipn_key->ipnk_ip),
1363                                          &ip_addr_v6 (&ctx->addr),
1364                                          ctx->length) &&
1365           ip_neighbor_is_dynamic (ipn))
1366         {
1367           vec_add1 (ctx->ipnis, ip_neighbor_get_index (ipn));
1368         }
1369     }
1370   return (WALK_CONTINUE);
1371 }
1372
1373
1374 /*
1375  * callback when an interface address is added or deleted
1376  */
1377 static void
1378 ip_neighbor_add_del_interface_address_v4 (ip4_main_t * im,
1379                                           uword opaque,
1380                                           u32 sw_if_index,
1381                                           ip4_address_t * address,
1382                                           u32 address_length,
1383                                           u32 if_address_index, u32 is_del)
1384 {
1385   /*
1386    * Flush the ARP cache of all entries covered by the address
1387    * that is being removed.
1388    */
1389   IP_NEIGHBOR_DBG ("addr-%d: %U, %U/%d",
1390                    (is_del ? "del" : "add"),
1391                    format_vnet_sw_if_index_name, vnet_get_main (),
1392                    sw_if_index, format_ip4_address, address, address_length);
1393
1394   if (is_del)
1395     {
1396       /* *INDENT-OFF* */
1397       ip_neighbor_walk_covered_ctx_t ctx = {
1398         .addr = {
1399           .ip.ip4 = *address,
1400           .version = AF_IP4,
1401         },
1402         .length = address_length,
1403       };
1404       /* *INDENT-ON* */
1405       index_t *ipni;
1406
1407       ip_neighbor_walk (AF_IP4, sw_if_index, ip_neighbor_walk_covered, &ctx);
1408
1409       vec_foreach (ipni, ctx.ipnis)
1410         ip_neighbor_destroy (ip_neighbor_get (*ipni));
1411
1412       vec_free (ctx.ipnis);
1413     }
1414 }
1415
1416 /*
1417  * callback when an interface address is added or deleted
1418  */
1419 static void
1420 ip_neighbor_add_del_interface_address_v6 (ip6_main_t * im,
1421                                           uword opaque,
1422                                           u32 sw_if_index,
1423                                           ip6_address_t * address,
1424                                           u32 address_length,
1425                                           u32 if_address_index, u32 is_del)
1426 {
1427   /*
1428    * Flush the ARP cache of all entries covered by the address
1429    * that is being removed.
1430    */
1431   IP_NEIGHBOR_DBG ("addr-change: %U, %U/%d %s",
1432                    format_vnet_sw_if_index_name, vnet_get_main (),
1433                    sw_if_index, format_ip6_address, address, address_length,
1434                    (is_del ? "del" : "add"));
1435
1436   if (is_del)
1437     {
1438       /* *INDENT-OFF* */
1439       ip_neighbor_walk_covered_ctx_t ctx = {
1440         .addr = {
1441           .ip.ip6 = *address,
1442           .version = AF_IP6,
1443         },
1444         .length = address_length,
1445       };
1446       /* *INDENT-ON* */
1447       index_t *ipni;
1448
1449       ip_neighbor_walk (AF_IP6, sw_if_index, ip_neighbor_walk_covered, &ctx);
1450
1451       vec_foreach (ipni, ctx.ipnis)
1452         ip_neighbor_destroy (ip_neighbor_get (*ipni));
1453
1454       vec_free (ctx.ipnis);
1455     }
1456 }
1457
1458 typedef struct ip_neighbor_table_bind_ctx_t_
1459 {
1460   u32 new_fib_index;
1461   u32 old_fib_index;
1462 } ip_neighbor_table_bind_ctx_t;
1463
1464 static walk_rc_t
1465 ip_neighbor_walk_table_bind (index_t ipni, void *arg)
1466 {
1467   ip_neighbor_table_bind_ctx_t *ctx = arg;
1468   ip_neighbor_t *ipn;
1469
1470   ipn = ip_neighbor_get (ipni);
1471   ip_neighbor_adj_fib_remove (ipn, ctx->old_fib_index);
1472   ip_neighbor_adj_fib_add (ipn, ctx->new_fib_index);
1473
1474   return (WALK_CONTINUE);
1475 }
1476
1477 static void
1478 ip_neighbor_table_bind_v4 (ip4_main_t * im,
1479                            uword opaque,
1480                            u32 sw_if_index,
1481                            u32 new_fib_index, u32 old_fib_index)
1482 {
1483   ip_neighbor_table_bind_ctx_t ctx = {
1484     .old_fib_index = old_fib_index,
1485     .new_fib_index = new_fib_index,
1486   };
1487
1488   ip_neighbor_walk (AF_IP4, sw_if_index, ip_neighbor_walk_table_bind, &ctx);
1489 }
1490
1491 static void
1492 ip_neighbor_table_bind_v6 (ip6_main_t * im,
1493                            uword opaque,
1494                            u32 sw_if_index,
1495                            u32 new_fib_index, u32 old_fib_index)
1496 {
1497   ip_neighbor_table_bind_ctx_t ctx = {
1498     .old_fib_index = old_fib_index,
1499     .new_fib_index = new_fib_index,
1500   };
1501
1502   ip_neighbor_walk (AF_IP6, sw_if_index, ip_neighbor_walk_table_bind, &ctx);
1503 }
1504
1505 typedef enum ip_neighbor_age_state_t_
1506 {
1507   IP_NEIGHBOR_AGE_ALIVE,
1508   IP_NEIGHBOR_AGE_PROBE,
1509   IP_NEIGHBOR_AGE_DEAD,
1510 } ip_neighbor_age_state_t;
1511
1512 #define IP_NEIGHBOR_PROCESS_SLEEP_LONG (0)
1513
1514 static ip_neighbor_age_state_t
1515 ip_neighbour_age_out (index_t ipni, f64 now, f64 * wait)
1516 {
1517   ip_address_family_t af;
1518   ip_neighbor_t *ipn;
1519   u32 ipndb_age;
1520   u32 ttl;
1521
1522   ipn = ip_neighbor_get (ipni);
1523   af = ip_neighbor_get_af (ipn);
1524   ipndb_age = ip_neighbor_db[af].ipndb_age;
1525   ttl = now - ipn->ipn_time_last_updated;
1526   *wait = ipndb_age;
1527
1528   if (ttl > ipndb_age)
1529     {
1530       IP_NEIGHBOR_DBG ("aged: %U @%f - %f > %d",
1531                        format_ip_neighbor, ipni, now,
1532                        ipn->ipn_time_last_updated, ipndb_age);
1533       if (ipn->ipn_n_probes > 2)
1534         {
1535           /* 3 strikes and yea-re out */
1536           IP_NEIGHBOR_DBG ("dead: %U", format_ip_neighbor, ipni);
1537           *wait = 1;
1538           return (IP_NEIGHBOR_AGE_DEAD);
1539         }
1540       else
1541         {
1542           ip_neighbor_probe_dst (ip_neighbor_get_sw_if_index (ipn),
1543                                  af, &ip_addr_46 (&ipn->ipn_key->ipnk_ip));
1544
1545           ipn->ipn_n_probes++;
1546           *wait = 1;
1547         }
1548     }
1549   else
1550     {
1551       /* here we are sure that ttl <= ipndb_age */
1552       *wait = ipndb_age - ttl + 1;
1553       return (IP_NEIGHBOR_AGE_ALIVE);
1554     }
1555
1556   return (IP_NEIGHBOR_AGE_PROBE);
1557 }
1558
1559 typedef enum ip_neighbor_process_event_t_
1560 {
1561   IP_NEIGHBOR_AGE_PROCESS_WAKEUP,
1562 } ip_neighbor_process_event_t;
1563
1564 static uword
1565 ip_neighbor_age_loop (vlib_main_t * vm,
1566                       vlib_node_runtime_t * rt,
1567                       vlib_frame_t * f, ip_address_family_t af)
1568 {
1569   uword event_type, *event_data = NULL;
1570   f64 timeout;
1571
1572   /* Set the timeout to an effectively infinite value when the process starts */
1573   timeout = IP_NEIGHBOR_PROCESS_SLEEP_LONG;
1574
1575   while (1)
1576     {
1577       f64 now;
1578
1579       if (!timeout)
1580         vlib_process_wait_for_event (vm);
1581       else
1582         vlib_process_wait_for_event_or_clock (vm, timeout);
1583
1584       event_type = vlib_process_get_events (vm, &event_data);
1585       vec_reset_length (event_data);
1586
1587       now = vlib_time_now (vm);
1588
1589       switch (event_type)
1590         {
1591         case ~0:
1592           {
1593             /* timer expired */
1594             ip_neighbor_elt_t *elt, *head;
1595             f64 wait;
1596
1597             timeout = ip_neighbor_db[af].ipndb_age;
1598             head = pool_elt_at_index (ip_neighbor_elt_pool,
1599                                       ip_neighbor_list_head[af]);
1600
1601           /* *INDENT-OFF*/
1602           /* the list is time sorted, newest first, so start from the back
1603            * and work forwards. Stop when we get to one that is alive */
1604           restart:
1605           clib_llist_foreach_reverse(ip_neighbor_elt_pool,
1606                                      ipne_anchor, head, elt,
1607           ({
1608             ip_neighbor_age_state_t res;
1609
1610             res = ip_neighbour_age_out(elt->ipne_index, now, &wait);
1611
1612             if (IP_NEIGHBOR_AGE_ALIVE == res) {
1613               /* the oldest neighbor has not yet expired, go back to sleep */
1614               timeout = clib_min (wait, timeout);
1615               break;
1616             }
1617             else if (IP_NEIGHBOR_AGE_DEAD == res) {
1618               /* the oldest neighbor is dead, pop it, then restart the walk
1619                * again from the back */
1620               ip_neighbor_destroy (ip_neighbor_get(elt->ipne_index));
1621               goto restart;
1622             }
1623
1624             timeout = clib_min (wait, timeout);
1625           }));
1626           /* *INDENT-ON* */
1627             break;
1628           }
1629         case IP_NEIGHBOR_AGE_PROCESS_WAKEUP:
1630           {
1631
1632             if (!ip_neighbor_db[af].ipndb_age)
1633               {
1634                 /* aging has been disabled */
1635                 timeout = 0;
1636                 break;
1637               }
1638             ip_neighbor_elt_t *elt, *head;
1639
1640             head = pool_elt_at_index (ip_neighbor_elt_pool,
1641                                       ip_neighbor_list_head[af]);
1642             /* no neighbors yet */
1643             if (clib_llist_is_empty (ip_neighbor_elt_pool, ipne_anchor, head))
1644               {
1645                 timeout = ip_neighbor_db[af].ipndb_age;
1646                 break;
1647               }
1648
1649             /* poke the oldset neighbour for aging, which returns how long we sleep for */
1650             elt = clib_llist_prev (ip_neighbor_elt_pool, ipne_anchor, head);
1651             ip_neighbour_age_out (elt->ipne_index, now, &timeout);
1652             break;
1653           }
1654         }
1655     }
1656   return 0;
1657 }
1658
1659 static uword
1660 ip4_neighbor_age_process (vlib_main_t * vm,
1661                           vlib_node_runtime_t * rt, vlib_frame_t * f)
1662 {
1663   return (ip_neighbor_age_loop (vm, rt, f, AF_IP4));
1664 }
1665
1666 static uword
1667 ip6_neighbor_age_process (vlib_main_t * vm,
1668                           vlib_node_runtime_t * rt, vlib_frame_t * f)
1669 {
1670   return (ip_neighbor_age_loop (vm, rt, f, AF_IP6));
1671 }
1672
1673 /* *INDENT-OFF* */
1674 VLIB_REGISTER_NODE (ip4_neighbor_age_process_node,static) = {
1675   .function = ip4_neighbor_age_process,
1676   .type = VLIB_NODE_TYPE_PROCESS,
1677   .name = "ip4-neighbor-age-process",
1678 };
1679 VLIB_REGISTER_NODE (ip6_neighbor_age_process_node,static) = {
1680   .function = ip6_neighbor_age_process,
1681   .type = VLIB_NODE_TYPE_PROCESS,
1682   .name = "ip6-neighbor-age-process",
1683 };
1684 /* *INDENT-ON* */
1685
1686 int
1687 ip_neighbor_config (ip_address_family_t af, u32 limit, u32 age, bool recycle)
1688 {
1689   ip_neighbor_db[af].ipndb_limit = limit;
1690   ip_neighbor_db[af].ipndb_recycle = recycle;
1691   ip_neighbor_db[af].ipndb_age = age;
1692
1693   vlib_process_signal_event (vlib_get_main (),
1694                              (AF_IP4 == af ?
1695                               ip4_neighbor_age_process_node.index :
1696                               ip6_neighbor_age_process_node.index),
1697                              IP_NEIGHBOR_AGE_PROCESS_WAKEUP, 0);
1698
1699   return (0);
1700 }
1701
1702 static clib_error_t *
1703 ip_neighbor_config_show (vlib_main_t * vm,
1704                          unformat_input_t * input, vlib_cli_command_t * cmd)
1705 {
1706   ip_address_family_t af;
1707
1708   /* *INDENT-OFF* */
1709   FOR_EACH_IP_ADDRESS_FAMILY(af) {
1710     vlib_cli_output (vm, "%U:", format_ip_address_family, af);
1711     vlib_cli_output (vm, "  limit:%d, age:%d, recycle:%d",
1712                      ip_neighbor_db[af].ipndb_limit,
1713                      ip_neighbor_db[af].ipndb_age,
1714                      ip_neighbor_db[af].ipndb_recycle);
1715   }
1716
1717   /* *INDENT-ON* */
1718   return (NULL);
1719 }
1720
1721 static clib_error_t *
1722 ip_neighbor_config_set (vlib_main_t *vm, unformat_input_t *input,
1723                         vlib_cli_command_t *cmd)
1724 {
1725   unformat_input_t _line_input, *line_input = &_line_input;
1726   clib_error_t *error = NULL;
1727   ip_address_family_t af;
1728   u32 limit, age;
1729   bool recycle;
1730
1731   if (!unformat_user (input, unformat_line_input, line_input))
1732     return 0;
1733
1734   if (!unformat (line_input, "%U", unformat_ip_address_family, &af))
1735     {
1736       error = unformat_parse_error (line_input);
1737       goto done;
1738     }
1739
1740   limit = ip_neighbor_db[af].ipndb_limit;
1741   age = ip_neighbor_db[af].ipndb_age;
1742   recycle = ip_neighbor_db[af].ipndb_recycle;
1743
1744   while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT)
1745     {
1746       if (unformat (line_input, "limit %u", &limit))
1747         ;
1748       else if (unformat (line_input, "age %u", &age))
1749         ;
1750       else if (unformat (line_input, "recycle"))
1751         recycle = true;
1752       else if (unformat (line_input, "norecycle"))
1753         recycle = false;
1754       else
1755         {
1756           error = unformat_parse_error (line_input);
1757           goto done;
1758         }
1759     }
1760
1761   ip_neighbor_config (af, limit, age, recycle);
1762
1763 done:
1764   unformat_free (line_input);
1765   return error;
1766 }
1767
1768 /* *INDENT-OFF* */
1769 VLIB_CLI_COMMAND (show_ip_neighbor_cfg_cmd_node, static) = {
1770   .path = "show ip neighbor-config",
1771   .function = ip_neighbor_config_show,
1772   .short_help = "show ip neighbor-config",
1773 };
1774 VLIB_CLI_COMMAND (set_ip_neighbor_cfg_cmd_node, static) = {
1775   .path = "set ip neighbor-config",
1776   .function = ip_neighbor_config_set,
1777   .short_help = "set ip neighbor-config ip4|ip6 [limit <limit>] [age <age>] "
1778                 "[recycle|norecycle]",
1779 };
1780 /* *INDENT-ON* */
1781
1782 static clib_error_t *
1783 ip_neighbor_init (vlib_main_t * vm)
1784 {
1785   {
1786     ip4_add_del_interface_address_callback_t cb = {
1787       .function = ip_neighbor_add_del_interface_address_v4,
1788     };
1789     vec_add1 (ip4_main.add_del_interface_address_callbacks, cb);
1790   }
1791   {
1792     ip6_add_del_interface_address_callback_t cb = {
1793       .function = ip_neighbor_add_del_interface_address_v6,
1794     };
1795     vec_add1 (ip6_main.add_del_interface_address_callbacks, cb);
1796   }
1797   {
1798     ip4_table_bind_callback_t cb = {
1799       .function = ip_neighbor_table_bind_v4,
1800     };
1801     vec_add1 (ip4_main.table_bind_callbacks, cb);
1802   }
1803   {
1804     ip6_table_bind_callback_t cb = {
1805       .function = ip_neighbor_table_bind_v6,
1806     };
1807     vec_add1 (ip6_main.table_bind_callbacks, cb);
1808   }
1809   {
1810     ethernet_address_change_ctx_t ctx = {
1811       .function = ip_neighbor_ethernet_change_mac,
1812       .function_opaque = 0,
1813     };
1814     vec_add1 (ethernet_main.address_change_callbacks, ctx);
1815   }
1816
1817   ipn_logger = vlib_log_register_class ("ip", "neighbor");
1818
1819   ip_address_family_t af;
1820
1821   FOR_EACH_IP_ADDRESS_FAMILY (af)
1822     ip_neighbor_list_head[af] =
1823     clib_llist_make_head (ip_neighbor_elt_pool, ipne_anchor);
1824
1825   return (NULL);
1826 }
1827
1828 /* *INDENT-OFF* */
1829 VLIB_INIT_FUNCTION (ip_neighbor_init) =
1830 {
1831   .runs_after = VLIB_INITS("ip_main_init"),
1832 };
1833 /* *INDENT-ON* */
1834
1835 /*
1836  * fd.io coding-style-patch-verification: ON
1837  *
1838  * Local Variables:
1839  * eval: (c-set-style "gnu")
1840  * End:
1841  */