ip-neighbor: Fix aging timeout
[vpp.git] / src / vnet / ip-neighbor / ip_neighbor.c
1 /*
2  * src/vnet/ip/ip_neighboor.c: ip neighbor generic handling
3  *
4  * Copyright (c) 2018 Cisco and/or its affiliates.
5  * Licensed under the Apache License, Version 2.0 (the "License");
6  * you may not use this file except in compliance with the License.
7  * You may obtain a copy of the License at:
8  *
9  *     http://www.apache.org/licenses/LICENSE-2.0
10  *
11  * Unless required by applicable law or agreed to in writing, software
12  * distributed under the License is distributed on an "AS IS" BASIS,
13  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14  * See the License for the specific language governing permissions and
15  * limitations under the License.
16  */
17
18 #include <vppinfra/llist.h>
19
20 #include <vnet/ip-neighbor/ip_neighbor.h>
21 #include <vnet/ip-neighbor/ip4_neighbor.h>
22 #include <vnet/ip-neighbor/ip6_neighbor.h>
23 #include <vnet/ip-neighbor/ip_neighbor_watch.h>
24
25 #include <vnet/ip/ip6_ll_table.h>
26 #include <vnet/fib/fib_table.h>
27 #include <vnet/adj/adj_mcast.h>
28
29 /** Pool for All IP neighbors */
30 static ip_neighbor_t *ip_neighbor_pool;
31
32 /** protocol specific lists of time sorted neighbors */
33 index_t ip_neighbor_list_head[IP46_N_TYPES];
34
35 typedef struct ip_neighbor_elt_t_
36 {
37   clib_llist_anchor_t ipne_anchor;
38   index_t ipne_index;
39 } ip_neighbor_elt_t;
40
41 /** Pool of linked list elemeents */
42 ip_neighbor_elt_t *ip_neighbor_elt_pool;
43
44 typedef struct ip_neighbor_db_t_
45 {
46   /** per interface hash */
47   uword **ipndb_hash;
48   /** per-protocol limit - max number of neighbors*/
49   u32 ipndb_limit;
50   /** max age of a neighbor before it's forcibly evicted */
51   u32 ipndb_age;
52   /** when the limit is reached and new neighbors are created, should
53    * we recycle an old one */
54   bool ipndb_recycle;
55   /** per-protocol number of elements */
56   u32 ipndb_n_elts;
57   /** per-protocol number of elements per-fib-index*/
58   u32 *ipndb_n_elts_per_fib;
59 } ip_neighbor_db_t;
60
61 static vlib_log_class_t ipn_logger;
62
63 /* DBs of neighbours one per AF */
64 /* *INDENT-OFF* */
65 static ip_neighbor_db_t ip_neighbor_db[IP46_N_TYPES] = {
66   [IP46_TYPE_IP4] = {
67     .ipndb_limit = 50000,
68     /* Default to not aging and not recycling */
69     .ipndb_age = 0,
70     .ipndb_recycle = false,
71   },
72   [IP46_TYPE_IP6] = {
73     .ipndb_limit = 50000,
74     /* Default to not aging and not recycling */
75     .ipndb_age = 0,
76     .ipndb_recycle = false,
77   }
78 };
79 /* *INDENT-ON* */
80
81 #define IP_NEIGHBOR_DBG(...)                           \
82     vlib_log_debug (ipn_logger, __VA_ARGS__);
83
84 #define IP_NEIGHBOR_INFO(...)                          \
85     vlib_log_notice (ipn_logger, __VA_ARGS__);
86
87 ip_neighbor_t *
88 ip_neighbor_get (index_t ipni)
89 {
90   if (pool_is_free_index (ip_neighbor_pool, ipni))
91     return (NULL);
92
93   return (pool_elt_at_index (ip_neighbor_pool, ipni));
94 }
95
96 static index_t
97 ip_neighbor_get_index (const ip_neighbor_t * ipn)
98 {
99   return (ipn - ip_neighbor_pool);
100 }
101
102 static bool
103 ip_neighbor_is_dynamic (const ip_neighbor_t * ipn)
104 {
105   return (ipn->ipn_flags & IP_NEIGHBOR_FLAG_DYNAMIC);
106 }
107
108 const ip46_address_t *
109 ip_neighbor_get_ip (const ip_neighbor_t * ipn)
110 {
111   return (&ipn->ipn_key->ipnk_ip);
112 }
113
114 const mac_address_t *
115 ip_neighbor_get_mac (const ip_neighbor_t * ipn)
116 {
117   return (&ipn->ipn_mac);
118 }
119
120 const u32
121 ip_neighbor_get_sw_if_index (const ip_neighbor_t * ipn)
122 {
123   return (ipn->ipn_key->ipnk_sw_if_index);
124 }
125
126 static void
127 ip_neighbor_list_remove (ip_neighbor_t * ipn)
128 {
129   /* new neighbours, are added to the head of the list, since the
130    * list is time sorted, newest first */
131   ip_neighbor_elt_t *elt;
132
133   if (~0 != ipn->ipn_elt)
134     {
135       elt = pool_elt_at_index (ip_neighbor_elt_pool, ipn->ipn_elt);
136
137       clib_llist_remove (ip_neighbor_elt_pool, ipne_anchor, elt);
138     }
139 }
140
141 static void
142 ip_neighbor_refresh (ip_neighbor_t * ipn)
143 {
144   /* new neighbours, are added to the head of the list, since the
145    * list is time sorted, newest first */
146   ip_neighbor_elt_t *elt, *head;
147
148   ipn->ipn_time_last_updated = vlib_time_now (vlib_get_main ());
149   ipn->ipn_n_probes = 0;
150
151   if (ip_neighbor_is_dynamic (ipn))
152     {
153       if (~0 == ipn->ipn_elt)
154         /* first time insertion */
155         pool_get_zero (ip_neighbor_elt_pool, elt);
156       else
157         {
158           /* already inserted - extract first */
159           elt = pool_elt_at_index (ip_neighbor_elt_pool, ipn->ipn_elt);
160
161           clib_llist_remove (ip_neighbor_elt_pool, ipne_anchor, elt);
162         }
163       head = pool_elt_at_index (ip_neighbor_elt_pool,
164                                 ip_neighbor_list_head[ipn->
165                                                       ipn_key->ipnk_type]);
166
167       elt->ipne_index = ip_neighbor_get_index (ipn);
168       clib_llist_add (ip_neighbor_elt_pool, ipne_anchor, elt, head);
169       ipn->ipn_elt = elt - ip_neighbor_elt_pool;
170     }
171 }
172
173 static void
174 ip_neighbor_db_add (const ip_neighbor_t * ipn)
175 {
176   vec_validate (ip_neighbor_db[ipn->ipn_key->ipnk_type].ipndb_hash,
177                 ipn->ipn_key->ipnk_sw_if_index);
178
179   if (!ip_neighbor_db[ipn->ipn_key->ipnk_type].ipndb_hash
180       [ipn->ipn_key->ipnk_sw_if_index])
181     ip_neighbor_db[ipn->ipn_key->ipnk_type].ipndb_hash[ipn->
182                                                        ipn_key->ipnk_sw_if_index]
183       = hash_create_mem (0, sizeof (ip_neighbor_key_t), sizeof (index_t));
184
185   hash_set_mem (ip_neighbor_db[ipn->ipn_key->ipnk_type].ipndb_hash
186                 [ipn->ipn_key->ipnk_sw_if_index], ipn->ipn_key,
187                 ip_neighbor_get_index (ipn));
188
189   ip_neighbor_db[ipn->ipn_key->ipnk_type].ipndb_n_elts++;
190 }
191
192 static void
193 ip_neighbor_db_remove (const ip_neighbor_key_t * key)
194 {
195   vec_validate (ip_neighbor_db[key->ipnk_type].ipndb_hash,
196                 key->ipnk_sw_if_index);
197
198   hash_unset_mem (ip_neighbor_db[key->ipnk_type].ipndb_hash
199                   [key->ipnk_sw_if_index], key);
200
201   ip_neighbor_db[key->ipnk_type].ipndb_n_elts--;
202 }
203
204 static ip_neighbor_t *
205 ip_neighbor_db_find (const ip_neighbor_key_t * key)
206 {
207   uword *p;
208
209   if (key->ipnk_sw_if_index >=
210       vec_len (ip_neighbor_db[key->ipnk_type].ipndb_hash))
211     return NULL;
212
213   p =
214     hash_get_mem (ip_neighbor_db[key->ipnk_type].ipndb_hash
215                   [key->ipnk_sw_if_index], key);
216
217   if (p)
218     return ip_neighbor_get (p[0]);
219
220   return (NULL);
221 }
222
223 static u8
224 ip46_type_pfx_len (ip46_type_t type)
225 {
226   return (type == IP46_TYPE_IP4 ? 32 : 128);
227 }
228
229 static void
230 ip_neighbor_adj_fib_add (ip_neighbor_t * ipn, u32 fib_index)
231 {
232   if (ipn->ipn_key->ipnk_type == IP46_TYPE_IP6 &&
233       ip6_address_is_link_local_unicast (&ipn->ipn_key->ipnk_ip.ip6))
234     {
235       ip6_ll_prefix_t pfx = {
236         .ilp_addr = ipn->ipn_key->ipnk_ip.ip6,
237         .ilp_sw_if_index = ipn->ipn_key->ipnk_sw_if_index,
238       };
239       ipn->ipn_fib_entry_index =
240         ip6_ll_table_entry_update (&pfx, FIB_ROUTE_PATH_FLAG_NONE);
241     }
242   else
243     {
244       fib_protocol_t fproto;
245
246       fproto = fib_proto_from_ip46 (ipn->ipn_key->ipnk_type);
247
248       fib_prefix_t pfx = {
249         .fp_len = ip46_type_pfx_len (ipn->ipn_key->ipnk_type),
250         .fp_proto = fproto,
251         .fp_addr = ipn->ipn_key->ipnk_ip,
252       };
253
254       ipn->ipn_fib_entry_index =
255         fib_table_entry_path_add (fib_index, &pfx, FIB_SOURCE_ADJ,
256                                   FIB_ENTRY_FLAG_ATTACHED,
257                                   fib_proto_to_dpo (fproto),
258                                   &pfx.fp_addr,
259                                   ipn->ipn_key->ipnk_sw_if_index,
260                                   ~0, 1, NULL, FIB_ROUTE_PATH_FLAG_NONE);
261
262       vec_validate (ip_neighbor_db
263                     [ipn->ipn_key->ipnk_type].ipndb_n_elts_per_fib,
264                     fib_index);
265
266       ip_neighbor_db[ipn->ipn_key->
267                      ipnk_type].ipndb_n_elts_per_fib[fib_index]++;
268
269       if (1 ==
270           ip_neighbor_db[ipn->ipn_key->
271                          ipnk_type].ipndb_n_elts_per_fib[fib_index])
272         fib_table_lock (fib_index, fproto, FIB_SOURCE_ADJ);
273     }
274 }
275
276 static void
277 ip_neighbor_adj_fib_remove (ip_neighbor_t * ipn, u32 fib_index)
278 {
279   if (FIB_NODE_INDEX_INVALID != ipn->ipn_fib_entry_index)
280     {
281       if (ipn->ipn_key->ipnk_type == IP46_TYPE_IP6 &&
282           ip6_address_is_link_local_unicast (&ipn->ipn_key->ipnk_ip.ip6))
283         {
284           ip6_ll_prefix_t pfx = {
285             .ilp_addr = ipn->ipn_key->ipnk_ip.ip6,
286             .ilp_sw_if_index = ipn->ipn_key->ipnk_sw_if_index,
287           };
288           ip6_ll_table_entry_delete (&pfx);
289         }
290       else
291         {
292           fib_protocol_t fproto;
293
294           fproto = fib_proto_from_ip46 (ipn->ipn_key->ipnk_type);
295
296           fib_prefix_t pfx = {
297             .fp_len = ip46_type_pfx_len (ipn->ipn_key->ipnk_type),
298             .fp_proto = fproto,
299             .fp_addr = ipn->ipn_key->ipnk_ip,
300           };
301
302           fib_table_entry_path_remove (fib_index,
303                                        &pfx,
304                                        FIB_SOURCE_ADJ,
305                                        fib_proto_to_dpo (fproto),
306                                        &pfx.fp_addr,
307                                        ipn->ipn_key->ipnk_sw_if_index,
308                                        ~0, 1, FIB_ROUTE_PATH_FLAG_NONE);
309
310           ip_neighbor_db[ipn->ipn_key->
311                          ipnk_type].ipndb_n_elts_per_fib[fib_index]--;
312
313           if (0 ==
314               ip_neighbor_db[ipn->ipn_key->
315                              ipnk_type].ipndb_n_elts_per_fib[fib_index])
316             fib_table_unlock (fib_index, fproto, FIB_SOURCE_ADJ);
317         }
318     }
319 }
320
321 static void
322 ip_neighbor_mk_complete (adj_index_t ai, ip_neighbor_t * ipn)
323 {
324   adj_nbr_update_rewrite (ai, ADJ_NBR_REWRITE_FLAG_COMPLETE,
325                           ethernet_build_rewrite (vnet_get_main (),
326                                                   ipn->
327                                                   ipn_key->ipnk_sw_if_index,
328                                                   adj_get_link_type (ai),
329                                                   ipn->ipn_mac.bytes));
330 }
331
332 static void
333 ip_neighbor_mk_incomplete (adj_index_t ai)
334 {
335   ip_adjacency_t *adj = adj_get (ai);
336
337   adj_nbr_update_rewrite (ai,
338                           ADJ_NBR_REWRITE_FLAG_INCOMPLETE,
339                           ethernet_build_rewrite (vnet_get_main (),
340                                                   adj->
341                                                   rewrite_header.sw_if_index,
342                                                   VNET_LINK_ARP,
343                                                   VNET_REWRITE_FOR_SW_INTERFACE_ADDRESS_BROADCAST));
344 }
345
346 static adj_walk_rc_t
347 ip_neighbor_mk_complete_walk (adj_index_t ai, void *ctx)
348 {
349   ip_neighbor_t *ipn = ctx;
350
351   ip_neighbor_mk_complete (ai, ipn);
352
353   return (ADJ_WALK_RC_CONTINUE);
354 }
355
356 static adj_walk_rc_t
357 ip_neighbor_mk_incomplete_walk (adj_index_t ai, void *ctx)
358 {
359   ip_neighbor_mk_incomplete (ai);
360
361   return (ADJ_WALK_RC_CONTINUE);
362 }
363
364 static void
365 ip_neighbor_free (ip_neighbor_t * ipn)
366 {
367   IP_NEIGHBOR_DBG ("free: %U", format_ip_neighbor,
368                    ip_neighbor_get_index (ipn));
369
370   adj_nbr_walk_nh (ipn->ipn_key->ipnk_sw_if_index,
371                    fib_proto_from_ip46 (ipn->ipn_key->ipnk_type),
372                    &ipn->ipn_key->ipnk_ip,
373                    ip_neighbor_mk_incomplete_walk, ipn);
374   ip_neighbor_adj_fib_remove
375     (ipn,
376      fib_table_get_index_for_sw_if_index
377      (fib_proto_from_ip46 (ipn->ipn_key->ipnk_type),
378       ipn->ipn_key->ipnk_sw_if_index));
379
380   ip_neighbor_list_remove (ipn);
381   ip_neighbor_db_remove (ipn->ipn_key);
382   clib_mem_free (ipn->ipn_key);
383
384   pool_put (ip_neighbor_pool, ipn);
385 }
386
387 static bool
388 ip_neighbor_force_reuse (ip46_type_t type)
389 {
390   if (!ip_neighbor_db[type].ipndb_recycle)
391     return false;
392
393   /* pluck the oldest entry, which is the one from the end of the list */
394   ip_neighbor_elt_t *elt, *head;
395
396   head =
397     pool_elt_at_index (ip_neighbor_elt_pool, ip_neighbor_list_head[type]);
398
399   if (clib_llist_is_empty (ip_neighbor_elt_pool, ipne_anchor, head))
400     return (false);
401
402   elt = clib_llist_prev (ip_neighbor_elt_pool, ipne_anchor, head);
403   ip_neighbor_free (ip_neighbor_get (elt->ipne_index));
404
405   return (true);
406 }
407
408 static ip_neighbor_t *
409 ip_neighbor_alloc (const ip_neighbor_key_t * key,
410                    const mac_address_t * mac, ip_neighbor_flags_t flags)
411 {
412   ip_neighbor_t *ipn;
413
414   if (ip_neighbor_db[key->ipnk_type].ipndb_limit &&
415       (ip_neighbor_db[key->ipnk_type].ipndb_n_elts >=
416        ip_neighbor_db[key->ipnk_type].ipndb_limit))
417     {
418       if (!ip_neighbor_force_reuse (key->ipnk_type))
419         return (NULL);
420     }
421
422   pool_get_zero (ip_neighbor_pool, ipn);
423
424   ipn->ipn_key = clib_mem_alloc (sizeof (*ipn->ipn_key));
425   clib_memcpy (ipn->ipn_key, key, sizeof (*ipn->ipn_key));
426
427   ipn->ipn_fib_entry_index = FIB_NODE_INDEX_INVALID;
428   ipn->ipn_flags = flags;
429   ipn->ipn_elt = ~0;
430
431   mac_address_copy (&ipn->ipn_mac, mac);
432
433   ip_neighbor_db_add (ipn);
434
435   /* create the adj-fib. the entry in the FIB table for the peer's interface */
436   if (!(ipn->ipn_flags & IP_NEIGHBOR_FLAG_NO_FIB_ENTRY))
437     ip_neighbor_adj_fib_add
438       (ipn, fib_table_get_index_for_sw_if_index
439        (fib_proto_from_ip46 (ipn->ipn_key->ipnk_type),
440         ipn->ipn_key->ipnk_sw_if_index));
441
442   return (ipn);
443 }
444
445 int
446 ip_neighbor_add (const ip46_address_t * ip,
447                  ip46_type_t type,
448                  const mac_address_t * mac,
449                  u32 sw_if_index,
450                  ip_neighbor_flags_t flags, u32 * stats_index)
451 {
452   fib_protocol_t fproto;
453   ip_neighbor_t *ipn;
454
455   /* main thread only */
456   ASSERT (0 == vlib_get_thread_index ());
457
458   fproto = fib_proto_from_ip46 (type);
459
460   const ip_neighbor_key_t key = {
461     .ipnk_ip = *ip,
462     .ipnk_sw_if_index = sw_if_index,
463     .ipnk_type = type,
464   };
465
466   ipn = ip_neighbor_db_find (&key);
467
468   if (ipn)
469     {
470       IP_NEIGHBOR_DBG ("update: %U, %U",
471                        format_vnet_sw_if_index_name, vnet_get_main (),
472                        sw_if_index, format_ip46_address, ip, type,
473                        format_ip_neighbor_flags, flags, format_mac_address_t,
474                        mac);
475
476       /* Refuse to over-write static neighbor entry. */
477       if (!(flags & IP_NEIGHBOR_FLAG_STATIC) &&
478           (ipn->ipn_flags & IP_NEIGHBOR_FLAG_STATIC))
479         {
480           /* if MAC address match, still check to send event */
481           if (0 == mac_address_cmp (&ipn->ipn_mac, mac))
482             goto check_customers;
483           return -2;
484         }
485
486       /*
487        * prevent a DoS attack from the data-plane that
488        * spams us with no-op updates to the MAC address
489        */
490       if (0 == mac_address_cmp (&ipn->ipn_mac, mac))
491         {
492           ip_neighbor_refresh (ipn);
493           goto check_customers;
494         }
495
496       mac_address_copy (&ipn->ipn_mac, mac);
497
498       /* A dynamic entry can become static, but not vice-versa.
499        * i.e. since if it was programmed by the CP then it must
500        * be removed by the CP */
501       if ((flags & IP_NEIGHBOR_FLAG_STATIC) &&
502           !(ipn->ipn_flags & IP_NEIGHBOR_FLAG_STATIC))
503         {
504           ip_neighbor_list_remove (ipn);
505           ipn->ipn_flags |= IP_NEIGHBOR_FLAG_STATIC;
506           ipn->ipn_flags &= ~IP_NEIGHBOR_FLAG_DYNAMIC;
507         }
508     }
509   else
510     {
511       IP_NEIGHBOR_INFO ("add: %U, %U",
512                         format_vnet_sw_if_index_name, vnet_get_main (),
513                         sw_if_index, format_ip46_address, ip, type,
514                         format_ip_neighbor_flags, flags, format_mac_address_t,
515                         mac);
516
517       ipn = ip_neighbor_alloc (&key, mac, flags);
518
519       if (NULL == ipn)
520         return VNET_API_ERROR_LIMIT_EXCEEDED;
521     }
522
523   /* Update time stamp and flags. */
524   ip_neighbor_refresh (ipn);
525
526   adj_nbr_walk_nh (ipn->ipn_key->ipnk_sw_if_index,
527                    fproto, &ipn->ipn_key->ipnk_ip,
528                    ip_neighbor_mk_complete_walk, ipn);
529
530 check_customers:
531   /* Customer(s) requesting event for this address? */
532   ip_neighbor_publish (ip_neighbor_get_index (ipn));
533
534   if (stats_index)
535     *stats_index = adj_nbr_find (fproto,
536                                  fib_proto_to_link (fproto),
537                                  &ipn->ipn_key->ipnk_ip,
538                                  ipn->ipn_key->ipnk_sw_if_index);
539   return 0;
540 }
541
542 int
543 ip_neighbor_del (const ip46_address_t * ip, ip46_type_t type, u32 sw_if_index)
544 {
545   ip_neighbor_t *ipn;
546
547   /* main thread only */
548   ASSERT (0 == vlib_get_thread_index ());
549
550   IP_NEIGHBOR_INFO ("delete: %U, %U",
551                     format_vnet_sw_if_index_name, vnet_get_main (),
552                     sw_if_index, format_ip46_address, ip, type);
553
554   const ip_neighbor_key_t key = {
555     .ipnk_ip = *ip,
556     .ipnk_sw_if_index = sw_if_index,
557     .ipnk_type = type,
558   };
559
560   ipn = ip_neighbor_db_find (&key);
561
562   if (NULL == ipn)
563     return (VNET_API_ERROR_NO_SUCH_ENTRY);
564
565   ip_neighbor_free (ipn);
566
567   return (0);
568 }
569
570 void
571 ip_neighbor_update (vnet_main_t * vnm, adj_index_t ai)
572 {
573   ip_neighbor_t *ipn;
574   ip_adjacency_t *adj;
575
576   adj = adj_get (ai);
577
578   ip_neighbor_key_t key = {
579     .ipnk_ip = adj->sub_type.nbr.next_hop,
580     .ipnk_type = fib_proto_to_ip46 (adj->ia_nh_proto),
581     .ipnk_sw_if_index = adj->rewrite_header.sw_if_index,
582   };
583   ipn = ip_neighbor_db_find (&key);
584
585   switch (adj->lookup_next_index)
586     {
587     case IP_LOOKUP_NEXT_ARP:
588       if (NULL != ipn)
589         {
590           adj_nbr_walk_nh (adj->rewrite_header.sw_if_index,
591                            adj->ia_nh_proto,
592                            &ipn->ipn_key->ipnk_ip,
593                            ip_neighbor_mk_complete_walk, ipn);
594         }
595       else
596         {
597           /*
598            * no matching ARP entry.
599            * construct the rewrite required to for an ARP packet, and stick
600            * that in the adj's pipe to smoke.
601            */
602           adj_nbr_update_rewrite
603             (ai,
604              ADJ_NBR_REWRITE_FLAG_INCOMPLETE,
605              ethernet_build_rewrite
606              (vnm,
607               adj->rewrite_header.sw_if_index,
608               VNET_LINK_ARP,
609               VNET_REWRITE_FOR_SW_INTERFACE_ADDRESS_BROADCAST));
610
611           /*
612            * since the FIB has added this adj for a route, it makes sense it
613            * may want to forward traffic sometime soon. Let's send a
614            * speculative ARP. just one. If we were to do periodically that
615            * wouldn't be bad either, but that's more code than i'm prepared to
616            * write at this time for relatively little reward.
617            */
618           /*
619            * adj_nbr_update_rewrite may actually call fib_walk_sync.
620            * fib_walk_sync may allocate a new adjacency and potentially cause
621            * a realloc for adj_pool. When that happens, adj pointer is no
622            * longer valid here.x We refresh adj pointer accordingly.
623            */
624           adj = adj_get (ai);
625           ip_neighbor_probe (adj);
626         }
627       break;
628     case IP_LOOKUP_NEXT_GLEAN:
629     case IP_LOOKUP_NEXT_BCAST:
630     case IP_LOOKUP_NEXT_MCAST:
631     case IP_LOOKUP_NEXT_DROP:
632     case IP_LOOKUP_NEXT_PUNT:
633     case IP_LOOKUP_NEXT_LOCAL:
634     case IP_LOOKUP_NEXT_REWRITE:
635     case IP_LOOKUP_NEXT_MCAST_MIDCHAIN:
636     case IP_LOOKUP_NEXT_MIDCHAIN:
637     case IP_LOOKUP_NEXT_ICMP_ERROR:
638     case IP_LOOKUP_N_NEXT:
639       ASSERT (0);
640       break;
641     }
642 }
643
644 void
645 ip_neighbor_learn (const ip_neighbor_learn_t * l)
646 {
647   ip_neighbor_add (&l->ip, l->type, &l->mac, l->sw_if_index,
648                    IP_NEIGHBOR_FLAG_DYNAMIC, NULL);
649 }
650
651 static clib_error_t *
652 ip_neighbor_cmd (vlib_main_t * vm,
653                  unformat_input_t * input, vlib_cli_command_t * cmd)
654 {
655   ip46_address_t ip = ip46_address_initializer;
656   mac_address_t mac = ZERO_MAC_ADDRESS;
657   vnet_main_t *vnm = vnet_get_main ();
658   ip_neighbor_flags_t flags;
659   u32 sw_if_index = ~0;
660   int is_add = 1;
661   int count = 1;
662
663   flags = IP_NEIGHBOR_FLAG_DYNAMIC;
664
665   while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT)
666     {
667       /* set ip arp TenGigE1/1/0/1 1.2.3.4 aa:bb:... or aabb.ccdd... */
668       if (unformat (input, "%U %U %U",
669                     unformat_vnet_sw_interface, vnm, &sw_if_index,
670                     unformat_ip46_address, &ip, IP46_TYPE_ANY,
671                     unformat_mac_address_t, &mac))
672         ;
673       else if (unformat (input, "delete") || unformat (input, "del"))
674         is_add = 0;
675       else if (unformat (input, "static"))
676         {
677           flags |= IP_NEIGHBOR_FLAG_STATIC;
678           flags &= ~IP_NEIGHBOR_FLAG_DYNAMIC;
679         }
680       else if (unformat (input, "no-fib-entry"))
681         flags |= IP_NEIGHBOR_FLAG_NO_FIB_ENTRY;
682       else if (unformat (input, "count %d", &count))
683         ;
684       else
685         break;
686     }
687
688   if (sw_if_index == ~0 ||
689       ip46_address_is_zero (&ip) || mac_address_is_zero (&mac))
690     return clib_error_return (0,
691                               "specify interface, IP address and MAC: `%U'",
692                               format_unformat_error, input);
693
694   while (count)
695     {
696       if (is_add)
697         ip_neighbor_add (&ip, ip46_address_get_type (&ip), &mac, sw_if_index,
698                          flags, NULL);
699       else
700         ip_neighbor_del (&ip, ip46_address_get_type (&ip), sw_if_index);
701
702       ip46_address_increment (ip46_address_get_type (&ip), &ip);
703       mac_address_increment (&mac);
704
705       --count;
706     }
707
708   return NULL;
709 }
710
711 /* *INDENT-OFF* */
712 /*?
713  * Add or delete IPv4 ARP cache entries.
714  *
715  * @note 'set ip neighbor' options (e.g. delete, static, 'fib-id <id>',
716  * 'count <number>', 'interface ip4_addr mac_addr') can be added in
717  * any order and combination.
718  *
719  * @cliexpar
720  * @parblock
721  * Add or delete IPv4 ARP cache entries as follows. MAC Address can be in
722  * either aa:bb:cc:dd:ee:ff format or aabb.ccdd.eeff format.
723  * @cliexcmd{set ip neighbor GigabitEthernet2/0/0 6.0.0.3 dead.beef.babe}
724  * @cliexcmd{set ip neighbor delete GigabitEthernet2/0/0 6.0.0.3 de:ad:be:ef:ba:be}
725  *
726  * To add or delete an IPv4 ARP cache entry to or from a specific fib
727  * table:
728  * @cliexcmd{set ip neighbor fib-id 1 GigabitEthernet2/0/0 6.0.0.3 dead.beef.babe}
729  * @cliexcmd{set ip neighbor fib-id 1 delete GigabitEthernet2/0/0 6.0.0.3 dead.beef.babe}
730  *
731  * Add or delete IPv4 static ARP cache entries as follows:
732  * @cliexcmd{set ip neighbor static GigabitEthernet2/0/0 6.0.0.3 dead.beef.babe}
733  * @cliexcmd{set ip neighbor static delete GigabitEthernet2/0/0 6.0.0.3 dead.beef.babe}
734  *
735  * For testing / debugging purposes, the 'set ip neighbor' command can add or
736  * delete multiple entries. Supply the 'count N' parameter:
737  * @cliexcmd{set ip neighbor count 10 GigabitEthernet2/0/0 6.0.0.3 dead.beef.babe}
738  * @endparblock
739  ?*/
740 VLIB_CLI_COMMAND (ip_neighbor_command, static) = {
741   .path = "set ip neighbor",
742   .short_help =
743   "set ip neighbor [del] <intfc> <ip-address> <mac-address> [static] [no-fib-entry] [count <count>] [fib-id <fib-id>] [proxy <lo-addr> - <hi-addr>]",
744   .function = ip_neighbor_cmd,
745 };
746 VLIB_CLI_COMMAND (ip_neighbor_command2, static) = {
747   .path = "ip neighbor",
748   .short_help =
749   "ip neighbor [del] <intfc> <ip-address> <mac-address> [static] [no-fib-entry] [count <count>] [fib-id <fib-id>] [proxy <lo-addr> - <hi-addr>]",
750   .function = ip_neighbor_cmd,
751 };
752 /* *INDENT-ON* */
753
754 static int
755 ip_neighbor_sort (void *a1, void *a2)
756 {
757   index_t *ipni1 = a1, *ipni2 = a2;
758   ip_neighbor_t *ipn1, *ipn2;
759   int cmp;
760
761   ipn1 = ip_neighbor_get (*ipni1);
762   ipn2 = ip_neighbor_get (*ipni2);
763
764   cmp = vnet_sw_interface_compare (vnet_get_main (),
765                                    ipn1->ipn_key->ipnk_sw_if_index,
766                                    ipn2->ipn_key->ipnk_sw_if_index);
767   if (!cmp)
768     cmp = ip46_address_cmp (&ipn1->ipn_key->ipnk_ip, &ipn2->ipn_key->ipnk_ip);
769   return cmp;
770 }
771
772 static index_t *
773 ip_neighbor_entries (u32 sw_if_index, ip46_type_t type)
774 {
775   index_t *ipnis = NULL;
776   ip_neighbor_t *ipn;
777
778   /* *INDENT-OFF* */
779   pool_foreach (ipn, ip_neighbor_pool,
780   ({
781     if (sw_if_index != ~0 &&
782         ipn->ipn_key->ipnk_sw_if_index != sw_if_index &&
783         (IP46_TYPE_ANY == type ||
784          (ipn->ipn_key->ipnk_type == type)))
785       continue;
786     vec_add1 (ipnis, ip_neighbor_get_index(ipn));
787   }));
788
789   /* *INDENT-ON* */
790
791   if (ipnis)
792     vec_sort_with_function (ipnis, ip_neighbor_sort);
793   return ipnis;
794 }
795
796 static clib_error_t *
797 ip_neighbor_show_sorted_i (vlib_main_t * vm,
798                            unformat_input_t * input,
799                            vlib_cli_command_t * cmd, ip46_type_t type)
800 {
801   ip_neighbor_elt_t *elt, *head;
802
803   head = pool_elt_at_index (ip_neighbor_elt_pool,
804                             ip_neighbor_list_head[type]);
805
806
807   vlib_cli_output (vm, "%=12s%=40s%=6s%=20s%=24s", "Time", "IP",
808                    "Flags", "Ethernet", "Interface");
809
810   /* *INDENT-OFF*/
811   /* the list is time sorted, newest first, so start from the back
812    * and work forwards. Stop when we get to one that is alive */
813   clib_llist_foreach_reverse(ip_neighbor_elt_pool,
814                              ipne_anchor, head, elt,
815   ({
816     vlib_cli_output (vm, "%U", format_ip_neighbor, elt->ipne_index);
817   }));
818   /* *INDENT-ON*/
819
820   return (NULL);
821 }
822
823 static clib_error_t *
824 ip_neighbor_show_i (vlib_main_t * vm,
825                     unformat_input_t * input,
826                     vlib_cli_command_t * cmd, ip46_type_t type)
827 {
828   index_t *ipni, *ipnis = NULL;
829   u32 sw_if_index;
830
831   /* Filter entries by interface if given. */
832   sw_if_index = ~0;
833   (void) unformat_user (input, unformat_vnet_sw_interface, vnet_get_main (),
834                         &sw_if_index);
835
836   ipnis = ip_neighbor_entries (sw_if_index, type);
837
838   if (ipnis)
839     vlib_cli_output (vm, "%=12s%=40s%=6s%=20s%=24s", "Time", "IP",
840                      "Flags", "Ethernet", "Interface");
841
842   vec_foreach (ipni, ipnis)
843   {
844     vlib_cli_output (vm, "%U", format_ip_neighbor, *ipni);
845   }
846   vec_free (ipnis);
847
848   return (NULL);
849 }
850
851 static clib_error_t *
852 ip_neighbor_show (vlib_main_t * vm,
853                   unformat_input_t * input, vlib_cli_command_t * cmd)
854 {
855   return (ip_neighbor_show_i (vm, input, cmd, IP46_TYPE_ANY));
856 }
857
858 static clib_error_t *
859 ip6_neighbor_show (vlib_main_t * vm,
860                    unformat_input_t * input, vlib_cli_command_t * cmd)
861 {
862   return (ip_neighbor_show_i (vm, input, cmd, IP46_TYPE_IP6));
863 }
864
865 static clib_error_t *
866 ip4_neighbor_show (vlib_main_t * vm,
867                    unformat_input_t * input, vlib_cli_command_t * cmd)
868 {
869   return (ip_neighbor_show_i (vm, input, cmd, IP46_TYPE_IP4));
870 }
871
872 static clib_error_t *
873 ip6_neighbor_show_sorted (vlib_main_t * vm,
874                           unformat_input_t * input, vlib_cli_command_t * cmd)
875 {
876   return (ip_neighbor_show_sorted_i (vm, input, cmd, IP46_TYPE_IP6));
877 }
878
879 static clib_error_t *
880 ip4_neighbor_show_sorted (vlib_main_t * vm,
881                           unformat_input_t * input, vlib_cli_command_t * cmd)
882 {
883   return (ip_neighbor_show_sorted_i (vm, input, cmd, IP46_TYPE_IP4));
884 }
885
886 /*?
887  * Display all the IP neighbor entries.
888  *
889  * @cliexpar
890  * Example of how to display the IPv4 ARP table:
891  * @cliexstart{show ip neighbor}
892  *    Time      FIB        IP4       Flags      Ethernet              Interface
893  *    346.3028   0       6.1.1.3            de:ad:be:ef:ba:be   GigabitEthernet2/0/0
894  *   3077.4271   0       6.1.1.4       S    de:ad:be:ef:ff:ff   GigabitEthernet2/0/0
895  *   2998.6409   1       6.2.2.3            de:ad:be:ef:00:01   GigabitEthernet2/0/0
896  * Proxy arps enabled for:
897  * Fib_index 0   6.0.0.1 - 6.0.0.11
898  * @cliexend
899  ?*/
900 /* *INDENT-OFF* */
901 VLIB_CLI_COMMAND (show_ip_neighbors_cmd_node, static) = {
902   .path = "show ip neighbors",
903   .function = ip_neighbor_show,
904   .short_help = "show ip neighbors [interface]",
905 };
906 VLIB_CLI_COMMAND (show_ip4_neighbors_cmd_node, static) = {
907   .path = "show ip4 neighbors",
908   .function = ip4_neighbor_show,
909   .short_help = "show ip4 neighbors [interface]",
910 };
911 VLIB_CLI_COMMAND (show_ip6_neighbors_cmd_node, static) = {
912   .path = "show ip6 neighbors",
913   .function = ip6_neighbor_show,
914   .short_help = "show ip6 neighbors [interface]",
915 };
916 VLIB_CLI_COMMAND (show_ip_neighbor_cmd_node, static) = {
917   .path = "show ip neighbor",
918   .function = ip_neighbor_show,
919   .short_help = "show ip neighbor [interface]",
920 };
921 VLIB_CLI_COMMAND (show_ip4_neighbor_cmd_node, static) = {
922   .path = "show ip4 neighbor",
923   .function = ip4_neighbor_show,
924   .short_help = "show ip4 neighbor [interface]",
925 };
926 VLIB_CLI_COMMAND (show_ip6_neighbor_cmd_node, static) = {
927   .path = "show ip6 neighbor",
928   .function = ip6_neighbor_show,
929   .short_help = "show ip6 neighbor [interface]",
930 };
931 VLIB_CLI_COMMAND (show_ip4_neighbor_sorted_cmd_node, static) = {
932   .path = "show ip4 neighbor-sorted",
933   .function = ip4_neighbor_show_sorted,
934   .short_help = "show ip4 neighbor-sorted",
935 };
936 VLIB_CLI_COMMAND (show_ip6_neighbor_sorted_cmd_node, static) = {
937   .path = "show ip6 neighbor-sorted",
938   .function = ip6_neighbor_show_sorted,
939   .short_help = "show ip6 neighbor-sorted",
940 };
941 /* *INDENT-ON* */
942
943 static ip_neighbor_vft_t ip_nbr_vfts[IP46_N_TYPES];
944
945 void
946 ip_neighbor_register (ip46_type_t type, const ip_neighbor_vft_t * vft)
947 {
948   ip_nbr_vfts[type] = *vft;
949 }
950
951 void
952 ip_neighbor_probe_dst (const ip_adjacency_t * adj, const ip46_address_t * dst)
953 {
954   if (!vnet_sw_interface_is_admin_up (vnet_get_main (),
955                                       adj->rewrite_header.sw_if_index))
956     return;
957
958   switch (adj->ia_nh_proto)
959     {
960     case FIB_PROTOCOL_IP6:
961       ip6_neighbor_probe_dst (adj, &dst->ip6);
962       break;
963     case FIB_PROTOCOL_IP4:
964       ip4_neighbor_probe_dst (adj, &dst->ip4);
965       break;
966     case FIB_PROTOCOL_MPLS:
967       ASSERT (0);
968       break;
969     }
970 }
971
972 void
973 ip_neighbor_probe (const ip_adjacency_t * adj)
974 {
975   ip_neighbor_probe_dst (adj, &adj->sub_type.nbr.next_hop);
976 }
977
978 void
979 ip_neighbor_advertise (vlib_main_t * vm,
980                        ip46_type_t type,
981                        const ip46_address_t * addr, u32 sw_if_index)
982 {
983   vnet_main_t *vnm = vnet_get_main ();
984
985   if (type == IP46_TYPE_IP4 || type == IP46_TYPE_BOTH)
986     ip4_neighbor_advertise (vm, vnm, sw_if_index, (addr) ? &addr->ip4 : NULL);
987   if (type == IP46_TYPE_IP6 || type == IP46_TYPE_BOTH)
988     ip6_neighbor_advertise (vm, vnm, sw_if_index, (addr) ? &addr->ip6 : NULL);
989 }
990
991 void
992 ip_neighbor_walk (ip46_type_t type,
993                   u32 sw_if_index, ip_neighbor_walk_cb_t cb, void *ctx)
994 {
995   ip_neighbor_key_t *key;
996   index_t ipni;
997
998   if (~0 == sw_if_index)
999     {
1000       uword **hash;
1001
1002       vec_foreach (hash, ip_neighbor_db[type].ipndb_hash)
1003       {
1004           /* *INDENT-OFF* */
1005           hash_foreach (key, ipni, *hash,
1006           ({
1007             cb (ipni, ctx);
1008           }));
1009           /* *INDENT-ON* */
1010       }
1011     }
1012   else
1013     {
1014       uword *hash;
1015
1016       if (vec_len (ip_neighbor_db[type].ipndb_hash) <= sw_if_index)
1017         return;
1018       hash = ip_neighbor_db[type].ipndb_hash[sw_if_index];
1019
1020       /* *INDENT-OFF* */
1021       hash_foreach (key, ipni, hash,
1022       ({
1023         cb (ipni, ctx);
1024       }));
1025       /* *INDENT-ON* */
1026     }
1027 }
1028
1029 int
1030 ip4_neighbor_proxy_add (u32 fib_index,
1031                         const ip4_address_t * start,
1032                         const ip4_address_t * end)
1033 {
1034   if (ip_nbr_vfts[IP46_TYPE_IP4].inv_proxy4_add)
1035     {
1036       return (ip_nbr_vfts[IP46_TYPE_IP4].inv_proxy4_add
1037               (fib_index, start, end));
1038     }
1039
1040   return (-1);
1041 }
1042
1043 int
1044 ip4_neighbor_proxy_delete (u32 fib_index,
1045                            const ip4_address_t * start,
1046                            const ip4_address_t * end)
1047 {
1048   if (ip_nbr_vfts[IP46_TYPE_IP4].inv_proxy4_del)
1049     {
1050       return (ip_nbr_vfts[IP46_TYPE_IP4].inv_proxy4_del
1051               (fib_index, start, end));
1052     }
1053   return -1;
1054 }
1055
1056 int
1057 ip4_neighbor_proxy_enable (u32 sw_if_index)
1058 {
1059   if (ip_nbr_vfts[IP46_TYPE_IP4].inv_proxy4_enable)
1060     {
1061       return (ip_nbr_vfts[IP46_TYPE_IP4].inv_proxy4_enable (sw_if_index));
1062     }
1063   return -1;
1064 }
1065
1066 int
1067 ip4_neighbor_proxy_disable (u32 sw_if_index)
1068 {
1069   if (ip_nbr_vfts[IP46_TYPE_IP4].inv_proxy4_disable)
1070     {
1071       return (ip_nbr_vfts[IP46_TYPE_IP4].inv_proxy4_disable (sw_if_index));
1072     }
1073   return -1;
1074 }
1075
1076 int
1077 ip6_neighbor_proxy_add (u32 sw_if_index, const ip6_address_t * addr)
1078 {
1079   if (ip_nbr_vfts[IP46_TYPE_IP6].inv_proxy6_add)
1080     {
1081       return (ip_nbr_vfts[IP46_TYPE_IP6].inv_proxy6_add (sw_if_index, addr));
1082     }
1083   return -1;
1084 }
1085
1086 int
1087 ip6_neighbor_proxy_del (u32 sw_if_index, const ip6_address_t * addr)
1088 {
1089   if (ip_nbr_vfts[IP46_TYPE_IP6].inv_proxy6_del)
1090     {
1091       return (ip_nbr_vfts[IP46_TYPE_IP6].inv_proxy6_del (sw_if_index, addr));
1092     }
1093   return -1;
1094 }
1095
1096 static void
1097 ip_neighbor_ethernet_change_mac (ethernet_main_t * em,
1098                                  u32 sw_if_index, uword opaque)
1099 {
1100   ip_neighbor_t *ipn;
1101   adj_index_t ai;
1102
1103   IP_NEIGHBOR_DBG ("mac-change: %U",
1104                    format_vnet_sw_if_index_name, vnet_get_main (),
1105                    sw_if_index);
1106
1107   /* *INDENT-OFF* */
1108   pool_foreach (ipn, ip_neighbor_pool,
1109   ({
1110     if (ipn->ipn_key->ipnk_sw_if_index == sw_if_index)
1111       adj_nbr_walk_nh (ipn->ipn_key->ipnk_sw_if_index,
1112                        fib_proto_from_ip46(ipn->ipn_key->ipnk_type),
1113                        &ipn->ipn_key->ipnk_ip,
1114                        ip_neighbor_mk_complete_walk,
1115                        ipn);
1116   }));
1117   /* *INDENT-ON* */
1118
1119   ai = adj_glean_get (FIB_PROTOCOL_IP4, sw_if_index);
1120
1121   if (ADJ_INDEX_INVALID != ai)
1122     adj_glean_update_rewrite (ai);
1123 }
1124
1125 void
1126 ip_neighbor_populate (ip46_type_t type, u32 sw_if_index)
1127 {
1128   index_t *ipnis = NULL, *ipni;
1129   ip_neighbor_t *ipn;
1130
1131   IP_NEIGHBOR_DBG ("populate: %U %U",
1132                    format_vnet_sw_if_index_name, vnet_get_main (),
1133                    sw_if_index, format_ip46_type, type);
1134
1135   /* *INDENT-OFF* */
1136   pool_foreach (ipn, ip_neighbor_pool,
1137   ({
1138     if (ipn->ipn_key->ipnk_type == type &&
1139         ipn->ipn_key->ipnk_sw_if_index == sw_if_index)
1140       vec_add1 (ipnis, ipn - ip_neighbor_pool);
1141   }));
1142   /* *INDENT-ON* */
1143
1144   vec_foreach (ipni, ipnis)
1145   {
1146     ipn = ip_neighbor_get (*ipni);
1147
1148     adj_nbr_walk_nh (ipn->ipn_key->ipnk_sw_if_index,
1149                      fib_proto_from_ip46 (ipn->ipn_key->ipnk_type),
1150                      &ipn->ipn_key->ipnk_ip,
1151                      ip_neighbor_mk_complete_walk, ipn);
1152   }
1153   vec_free (ipnis);
1154 }
1155
1156 void
1157 ip_neighbor_flush (ip46_type_t type, u32 sw_if_index)
1158 {
1159   index_t *ipnis = NULL, *ipni;
1160   ip_neighbor_t *ipn;
1161
1162   IP_NEIGHBOR_DBG ("flush: %U %U",
1163                    format_vnet_sw_if_index_name, vnet_get_main (),
1164                    sw_if_index, format_ip46_type, type);
1165
1166   /* *INDENT-OFF* */
1167   pool_foreach (ipn, ip_neighbor_pool,
1168   ({
1169     if (ipn->ipn_key->ipnk_type == type &&
1170         ipn->ipn_key->ipnk_sw_if_index == sw_if_index &&
1171         ip_neighbor_is_dynamic (ipn))
1172       vec_add1 (ipnis, ipn - ip_neighbor_pool);
1173   }));
1174   /* *INDENT-ON* */
1175
1176   vec_foreach (ipni, ipnis) ip_neighbor_free (ip_neighbor_get (*ipni));
1177   vec_free (ipnis);
1178 }
1179
1180 /*
1181  * Remove any arp entries associated with the specified interface
1182  */
1183 static clib_error_t *
1184 ip_neighbor_interface_admin_change (vnet_main_t * vnm,
1185                                     u32 sw_if_index, u32 flags)
1186 {
1187   ip46_type_t type;
1188
1189   IP_NEIGHBOR_DBG ("interface-admin: %U  %s",
1190                    format_vnet_sw_if_index_name, vnet_get_main (),
1191                    sw_if_index,
1192                    (flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP ? "up" : "down"));
1193
1194   if (flags & VNET_SW_INTERFACE_FLAG_ADMIN_UP)
1195     {
1196       FOREACH_IP46_TYPE (type) ip_neighbor_populate (type, sw_if_index);
1197     }
1198   else
1199     {
1200       /* admin down, flush all neighbours */
1201       FOREACH_IP46_TYPE (type) ip_neighbor_flush (type, sw_if_index);
1202     }
1203
1204   return (NULL);
1205 }
1206
1207 VNET_SW_INTERFACE_ADMIN_UP_DOWN_FUNCTION (ip_neighbor_interface_admin_change);
1208
1209 /*
1210  * Remove any arp entries associated with the specified interface
1211  */
1212 static clib_error_t *
1213 ip_neighbor_delete_sw_interface (vnet_main_t * vnm,
1214                                  u32 sw_if_index, u32 is_add)
1215 {
1216   IP_NEIGHBOR_DBG ("interface-change: %U  %s",
1217                    format_vnet_sw_if_index_name, vnet_get_main (),
1218                    sw_if_index, (is_add ? "add" : "del"));
1219
1220   if (!is_add && sw_if_index != ~0)
1221     {
1222       ip46_type_t type;
1223
1224       FOREACH_IP46_TYPE (type) ip_neighbor_flush (type, sw_if_index);
1225     }
1226
1227   return (NULL);
1228 }
1229
1230 VNET_SW_INTERFACE_ADD_DEL_FUNCTION (ip_neighbor_delete_sw_interface);
1231
1232 typedef struct ip_neighbor_walk_covered_ctx_t_
1233 {
1234   ip46_type_t type;
1235   ip46_address_t addr;
1236   u32 length;
1237   index_t *ipnis;
1238 } ip_neighbor_walk_covered_ctx_t;
1239
1240 static walk_rc_t
1241 ip_neighbor_walk_covered (index_t ipni, void *arg)
1242 {
1243   ip_neighbor_walk_covered_ctx_t *ctx = arg;
1244   ip_neighbor_t *ipn;
1245
1246   ipn = ip_neighbor_get (ipni);
1247
1248   ASSERT (ipn->ipn_key->ipnk_type == ctx->type);
1249
1250   if (IP46_TYPE_IP4 == ctx->type)
1251     {
1252       if (ip4_destination_matches_route (&ip4_main,
1253                                          &ipn->ipn_key->ipnk_ip.ip4,
1254                                          &ctx->addr.ip4,
1255                                          ctx->length) &&
1256           ip_neighbor_is_dynamic (ipn))
1257         {
1258           vec_add1 (ctx->ipnis, ip_neighbor_get_index (ipn));
1259         }
1260     }
1261   return (WALK_CONTINUE);
1262 }
1263
1264
1265 /*
1266  * callback when an interface address is added or deleted
1267  */
1268 static void
1269 ip_neighbor_add_del_interface_address_v4 (ip4_main_t * im,
1270                                           uword opaque,
1271                                           u32 sw_if_index,
1272                                           ip4_address_t * address,
1273                                           u32 address_length,
1274                                           u32 if_address_index, u32 is_del)
1275 {
1276   /*
1277    * Flush the ARP cache of all entries covered by the address
1278    * that is being removed.
1279    */
1280   IP_NEIGHBOR_DBG ("addr-%d: %U, %U/%d",
1281                    (is_del ? "del" : "add"),
1282                    format_vnet_sw_if_index_name, vnet_get_main (),
1283                    sw_if_index, format_ip4_address, address, address_length);
1284
1285   if (is_del)
1286     {
1287       ip_neighbor_walk_covered_ctx_t ctx = {
1288         .addr.ip4 = *address,
1289         .type = IP46_TYPE_IP4,
1290         .length = address_length,
1291       };
1292       index_t *ipni;
1293
1294       ip_neighbor_walk (IP46_TYPE_IP4, sw_if_index,
1295                         ip_neighbor_walk_covered, &ctx);
1296
1297       vec_foreach (ipni, ctx.ipnis)
1298         ip_neighbor_free (ip_neighbor_get (*ipni));
1299
1300       vec_free (ctx.ipnis);
1301     }
1302 }
1303
1304 /*
1305  * callback when an interface address is added or deleted
1306  */
1307 static void
1308 ip_neighbor_add_del_interface_address_v6 (ip6_main_t * im,
1309                                           uword opaque,
1310                                           u32 sw_if_index,
1311                                           ip6_address_t * address,
1312                                           u32 address_length,
1313                                           u32 if_address_index, u32 is_del)
1314 {
1315   /*
1316    * Flush the ARP cache of all entries covered by the address
1317    * that is being removed.
1318    */
1319   IP_NEIGHBOR_DBG ("addr-change: %U, %U/%d %s",
1320                    format_vnet_sw_if_index_name, vnet_get_main (),
1321                    sw_if_index, format_ip6_address, address, address_length,
1322                    (is_del ? "del" : "add"));
1323
1324   if (is_del)
1325     {
1326       ip_neighbor_walk_covered_ctx_t ctx = {
1327         .addr.ip6 = *address,
1328         .type = IP46_TYPE_IP6,
1329         .length = address_length,
1330       };
1331       index_t *ipni;
1332
1333       ip_neighbor_walk (IP46_TYPE_IP6, sw_if_index,
1334                         ip_neighbor_walk_covered, &ctx);
1335
1336       vec_foreach (ipni, ctx.ipnis)
1337         ip_neighbor_free (ip_neighbor_get (*ipni));
1338
1339       vec_free (ctx.ipnis);
1340     }
1341 }
1342
1343 typedef struct ip_neighbor_table_bind_ctx_t_
1344 {
1345   u32 new_fib_index;
1346   u32 old_fib_index;
1347 } ip_neighbor_table_bind_ctx_t;
1348
1349 static walk_rc_t
1350 ip_neighbor_walk_table_bind (index_t ipni, void *arg)
1351 {
1352   ip_neighbor_table_bind_ctx_t *ctx = arg;
1353   ip_neighbor_t *ipn;
1354
1355   ipn = ip_neighbor_get (ipni);
1356   ip_neighbor_adj_fib_remove (ipn, ctx->old_fib_index);
1357   ip_neighbor_adj_fib_add (ipn, ctx->new_fib_index);
1358
1359   return (WALK_CONTINUE);
1360 }
1361
1362 static void
1363 ip_neighbor_table_bind_v4 (ip4_main_t * im,
1364                            uword opaque,
1365                            u32 sw_if_index,
1366                            u32 new_fib_index, u32 old_fib_index)
1367 {
1368   ip_neighbor_table_bind_ctx_t ctx = {
1369     .old_fib_index = old_fib_index,
1370     .new_fib_index = new_fib_index,
1371   };
1372
1373   ip_neighbor_walk (IP46_TYPE_IP4, sw_if_index,
1374                     ip_neighbor_walk_table_bind, &ctx);
1375 }
1376
1377 static void
1378 ip_neighbor_table_bind_v6 (ip6_main_t * im,
1379                            uword opaque,
1380                            u32 sw_if_index,
1381                            u32 new_fib_index, u32 old_fib_index)
1382 {
1383   ip_neighbor_table_bind_ctx_t ctx = {
1384     .old_fib_index = old_fib_index,
1385     .new_fib_index = new_fib_index,
1386   };
1387
1388   ip_neighbor_walk (IP46_TYPE_IP6, sw_if_index,
1389                     ip_neighbor_walk_table_bind, &ctx);
1390 }
1391
1392 typedef enum ip_neighbor_age_state_t_
1393 {
1394   IP_NEIGHBOR_AGE_ALIVE,
1395   IP_NEIGHBOR_AGE_PROBE,
1396   IP_NEIGHBOR_AGE_DEAD,
1397 } ip_neighbor_age_state_t;
1398
1399 #define IP_NEIGHBOR_PROCESS_SLEEP_LONG (0)
1400
1401 static ip_neighbor_age_state_t
1402 ip_neighbour_age_out (index_t ipni, f64 now, f64 * wait)
1403 {
1404   ip_neighbor_t *ipn;
1405   u32 ipndb_age;
1406   u32 ttl;
1407
1408   ipn = ip_neighbor_get (ipni);
1409   ipndb_age = ip_neighbor_db[ipn->ipn_key->ipnk_type].ipndb_age;
1410   ttl = now - ipn->ipn_time_last_updated;
1411   *wait = ipndb_age;
1412
1413   if (ttl > ipndb_age)
1414     {
1415       IP_NEIGHBOR_DBG ("aged: %U @%f - %f > %d",
1416                        format_ip_neighbor, ipni, now,
1417                        ipn->ipn_time_last_updated, ipndb_age);
1418       if (ipn->ipn_n_probes > 2)
1419         {
1420           /* 3 strikes and yea-re out */
1421           IP_NEIGHBOR_DBG ("dead: %U", format_ip_neighbor, ipni);
1422           *wait = 1;
1423           return (IP_NEIGHBOR_AGE_DEAD);
1424         }
1425       else
1426         {
1427           adj_index_t ai;
1428
1429           ai = adj_glean_get (fib_proto_from_ip46 (ipn->ipn_key->ipnk_type),
1430                               ip_neighbor_get_sw_if_index (ipn));
1431
1432           if (ADJ_INDEX_INVALID != ai)
1433             ip_neighbor_probe_dst (adj_get (ai), ip_neighbor_get_ip (ipn));
1434
1435           ipn->ipn_n_probes++;
1436           *wait = 1;
1437         }
1438     }
1439   else
1440     {
1441       /* here we are sure that ttl <= ipndb_age */
1442       *wait = ipndb_age - ttl + 1;
1443       return (IP_NEIGHBOR_AGE_ALIVE);
1444     }
1445
1446   return (IP_NEIGHBOR_AGE_PROBE);
1447 }
1448
1449 typedef enum ip_neighbor_process_event_t_
1450 {
1451   IP_NEIGHBOR_AGE_PROCESS_WAKEUP,
1452 } ip_neighbor_process_event_t;
1453
1454 static uword
1455 ip_neighbor_age_loop (vlib_main_t * vm,
1456                       vlib_node_runtime_t * rt,
1457                       vlib_frame_t * f, ip46_type_t type)
1458 {
1459   uword event_type, *event_data = NULL;
1460   f64 timeout;
1461
1462   /* Set the timeout to an effectively infinite value when the process starts */
1463   timeout = IP_NEIGHBOR_PROCESS_SLEEP_LONG;
1464
1465   while (1)
1466     {
1467       f64 now;
1468
1469       if (!timeout)
1470         vlib_process_wait_for_event (vm);
1471       else
1472         vlib_process_wait_for_event_or_clock (vm, timeout);
1473
1474       event_type = vlib_process_get_events (vm, &event_data);
1475       vec_reset_length (event_data);
1476
1477       now = vlib_time_now (vm);
1478
1479       switch (event_type)
1480         {
1481         case ~0:
1482           {
1483             /* timer expired */
1484             ip_neighbor_elt_t *elt, *head;
1485             f64 wait;
1486
1487             timeout = ip_neighbor_db[type].ipndb_age;
1488             head = pool_elt_at_index (ip_neighbor_elt_pool,
1489                                       ip_neighbor_list_head[type]);
1490
1491           /* *INDENT-OFF*/
1492           /* the list is time sorted, newest first, so start from the back
1493            * and work forwards. Stop when we get to one that is alive */
1494           restart:
1495           clib_llist_foreach_reverse(ip_neighbor_elt_pool,
1496                                      ipne_anchor, head, elt,
1497           ({
1498             ip_neighbor_age_state_t res;
1499
1500             res = ip_neighbour_age_out(elt->ipne_index, now, &wait);
1501
1502             if (IP_NEIGHBOR_AGE_ALIVE == res) {
1503               /* the oldest neighbor has not yet expired, go back to sleep */
1504               timeout = clib_min (wait, timeout);
1505               break;
1506             }
1507             else if (IP_NEIGHBOR_AGE_DEAD == res) {
1508               /* the oldest neighbor is dead, pop it, then restart the walk
1509                * again from the back */
1510               ip_neighbor_free (ip_neighbor_get(elt->ipne_index));
1511               goto restart;
1512             }
1513
1514             timeout = clib_min (wait, timeout);
1515           }));
1516           /* *INDENT-ON* */
1517             break;
1518           }
1519         case IP_NEIGHBOR_AGE_PROCESS_WAKEUP:
1520           {
1521
1522             if (!ip_neighbor_db[type].ipndb_age)
1523               {
1524                 /* aging has been disabled */
1525                 timeout = 0;
1526                 break;
1527               }
1528             ip_neighbor_elt_t *elt, *head;
1529
1530             head = pool_elt_at_index (ip_neighbor_elt_pool,
1531                                       ip_neighbor_list_head[type]);
1532             /* no neighbors yet */
1533             if (clib_llist_is_empty (ip_neighbor_elt_pool, ipne_anchor, head))
1534               {
1535                 timeout = ip_neighbor_db[type].ipndb_age;
1536                 break;
1537               }
1538
1539             /* poke the oldset neighbour for aging, which returns how long we sleep for */
1540             elt = clib_llist_prev (ip_neighbor_elt_pool, ipne_anchor, head);
1541             ip_neighbour_age_out (elt->ipne_index, now, &timeout);
1542             break;
1543           }
1544         }
1545     }
1546   return 0;
1547 }
1548
1549 static uword
1550 ip4_neighbor_age_process (vlib_main_t * vm,
1551                           vlib_node_runtime_t * rt, vlib_frame_t * f)
1552 {
1553   return (ip_neighbor_age_loop (vm, rt, f, IP46_TYPE_IP4));
1554 }
1555
1556 static uword
1557 ip6_neighbor_age_process (vlib_main_t * vm,
1558                           vlib_node_runtime_t * rt, vlib_frame_t * f)
1559 {
1560   return (ip_neighbor_age_loop (vm, rt, f, IP46_TYPE_IP6));
1561 }
1562
1563 /* *INDENT-OFF* */
1564 VLIB_REGISTER_NODE (ip4_neighbor_age_process_node,static) = {
1565   .function = ip4_neighbor_age_process,
1566   .type = VLIB_NODE_TYPE_PROCESS,
1567   .name = "ip4-neighbor-age-process",
1568 };
1569 VLIB_REGISTER_NODE (ip6_neighbor_age_process_node,static) = {
1570   .function = ip6_neighbor_age_process,
1571   .type = VLIB_NODE_TYPE_PROCESS,
1572   .name = "ip6-neighbor-age-process",
1573 };
1574 /* *INDENT-ON* */
1575
1576 int
1577 ip_neighbor_config (ip46_type_t type, u32 limit, u32 age, bool recycle)
1578 {
1579   ip_neighbor_db[type].ipndb_limit = limit;
1580   ip_neighbor_db[type].ipndb_recycle = recycle;
1581   ip_neighbor_db[type].ipndb_age = age;
1582
1583   vlib_process_signal_event (vlib_get_main (),
1584                              (IP46_TYPE_IP4 == type ?
1585                               ip4_neighbor_age_process_node.index :
1586                               ip6_neighbor_age_process_node.index),
1587                              IP_NEIGHBOR_AGE_PROCESS_WAKEUP, 0);
1588
1589   return (0);
1590 }
1591
1592 static clib_error_t *
1593 ip_neighbor_config_show (vlib_main_t * vm,
1594                          unformat_input_t * input, vlib_cli_command_t * cmd)
1595 {
1596   ip46_type_t type;
1597
1598   /* *INDENT-OFF* */
1599   FOREACH_IP46_TYPE(type) {
1600     vlib_cli_output (vm, "%U:", format_ip46_type, type);
1601     vlib_cli_output (vm, "  limit:%d, age:%d, recycle:%d",
1602                      ip_neighbor_db[type].ipndb_limit,
1603                      ip_neighbor_db[type].ipndb_age,
1604                      ip_neighbor_db[type].ipndb_recycle);
1605   }
1606
1607   /* *INDENT-ON* */
1608   return (NULL);
1609 }
1610
1611 /* *INDENT-OFF* */
1612 VLIB_CLI_COMMAND (show_ip_neighbor_cfg_cmd_node, static) = {
1613   .path = "show ip neighbor-config",
1614   .function = ip_neighbor_config_show,
1615   .short_help = "show ip neighbor-config",
1616 };
1617 /* *INDENT-ON* */
1618
1619 static clib_error_t *
1620 ip_neighbor_init (vlib_main_t * vm)
1621 {
1622   {
1623     ip4_add_del_interface_address_callback_t cb = {
1624       .function = ip_neighbor_add_del_interface_address_v4,
1625     };
1626     vec_add1 (ip4_main.add_del_interface_address_callbacks, cb);
1627   }
1628   {
1629     ip6_add_del_interface_address_callback_t cb = {
1630       .function = ip_neighbor_add_del_interface_address_v6,
1631     };
1632     vec_add1 (ip6_main.add_del_interface_address_callbacks, cb);
1633   }
1634   {
1635     ip4_table_bind_callback_t cb = {
1636       .function = ip_neighbor_table_bind_v4,
1637     };
1638     vec_add1 (ip4_main.table_bind_callbacks, cb);
1639   }
1640   {
1641     ip6_table_bind_callback_t cb = {
1642       .function = ip_neighbor_table_bind_v6,
1643     };
1644     vec_add1 (ip6_main.table_bind_callbacks, cb);
1645   }
1646   {
1647     ethernet_address_change_ctx_t ctx = {
1648       .function = ip_neighbor_ethernet_change_mac,
1649       .function_opaque = 0,
1650     };
1651     vec_add1 (ethernet_main.address_change_callbacks, ctx);
1652   }
1653
1654   ipn_logger = vlib_log_register_class ("ip", "neighbor");
1655
1656   ip46_type_t type;
1657
1658   FOREACH_IP46_TYPE (type)
1659     ip_neighbor_list_head[type] =
1660     clib_llist_make_head (ip_neighbor_elt_pool, ipne_anchor);
1661
1662   return (NULL);
1663 }
1664
1665 /* *INDENT-OFF* */
1666 VLIB_INIT_FUNCTION (ip_neighbor_init) =
1667 {
1668   .runs_after = VLIB_INITS("ip_main_init"),
1669 };
1670 /* *INDENT-ON* */
1671
1672 /*
1673  * fd.io coding-style-patch-verification: ON
1674  *
1675  * Local Variables:
1676  * eval: (c-set-style "gnu")
1677  * End:
1678  */